vrshoot
view libs/assimp/FBXBinaryTokenizer.cpp @ 0:b2f14e535253
initial commit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 01 Feb 2014 19:58:19 +0200 |
parents | |
children |
line source
1 /*
2 Open Asset Import Library (assimp)
3 ----------------------------------------------------------------------
5 Copyright (c) 2006-2012, assimp team
6 All rights reserved.
8 Redistribution and use of this software in source and binary forms,
9 with or without modification, are permitted provided that the
10 following conditions are met:
12 * Redistributions of source code must retain the above
13 copyright notice, this list of conditions and the
14 following disclaimer.
16 * Redistributions in binary form must reproduce the above
17 copyright notice, this list of conditions and the
18 following disclaimer in the documentation and/or other
19 materials provided with the distribution.
21 * Neither the name of the assimp team, nor the names of its
22 contributors may be used to endorse or promote products
23 derived from this software without specific prior
24 written permission of the assimp team.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 ----------------------------------------------------------------------
39 */
40 /** @file FBXBinaryTokenizer.cpp
41 * @brief Implementation of a fake lexer for binary fbx files -
42 * we emit tokens so the parser needs almost no special handling
43 * for binary files.
44 */
45 #include "AssimpPCH.h"
47 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
49 #include "FBXTokenizer.h"
50 #include "FBXUtil.h"
52 namespace Assimp {
53 namespace FBX {
56 // ------------------------------------------------------------------------------------------------
57 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
58 : sbegin(sbegin)
59 , send(send)
60 , type(type)
61 , line(offset)
62 , column(BINARY_MARKER)
63 #ifdef DEBUG
64 , contents(sbegin, static_cast<size_t>(send-sbegin))
65 #endif
66 {
67 ai_assert(sbegin);
68 ai_assert(send);
70 // binary tokens may have zero length because they are sometimes dummies
71 // inserted by TokenizeBinary()
72 ai_assert(send >= sbegin);
73 }
76 namespace {
78 // ------------------------------------------------------------------------------------------------
79 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
80 void TokenizeError(const std::string& message, unsigned int offset)
81 {
82 throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
83 }
86 // ------------------------------------------------------------------------------------------------
87 uint32_t Offset(const char* begin, const char* cursor)
88 {
89 ai_assert(begin <= cursor);
90 return static_cast<unsigned int>(cursor - begin);
91 }
94 // ------------------------------------------------------------------------------------------------
95 void TokenizeError(const std::string& message, const char* begin, const char* cursor)
96 {
97 TokenizeError(message, Offset(begin, cursor));
98 }
101 // ------------------------------------------------------------------------------------------------
102 uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
103 {
104 if(Offset(cursor, end) < 4) {
105 TokenizeError("cannot ReadWord, out of bounds",input, cursor);
106 }
108 uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
109 AI_SWAP4(word);
111 cursor += 4;
113 return word;
114 }
117 // ------------------------------------------------------------------------------------------------
118 uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
119 {
120 if(Offset(cursor, end) < 1) {
121 TokenizeError("cannot ReadByte, out of bounds",input, cursor);
122 }
124 uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
125 ++cursor;
127 return word;
128 }
131 // ------------------------------------------------------------------------------------------------
132 unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
133 bool long_length = false,
134 bool allow_null = false)
135 {
136 const uint32_t len_len = long_length ? 4 : 1;
137 if(Offset(cursor, end) < len_len) {
138 TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
139 }
141 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
143 if (Offset(cursor, end) < length) {
144 TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
145 }
147 sbegin_out = cursor;
148 cursor += length;
150 send_out = cursor;
152 if(!allow_null) {
153 for (unsigned int i = 0; i < length; ++i) {
154 if(sbegin_out[i] == '\0') {
155 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
156 }
157 }
158 }
160 return length;
161 }
165 // ------------------------------------------------------------------------------------------------
166 void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
167 {
168 if(Offset(cursor, end) < 1) {
169 TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
170 }
172 const char type = *cursor;
173 sbegin_out = cursor++;
175 switch(type)
176 {
177 // 16 bit int
178 case 'Y':
179 cursor += 2;
180 break;
182 // 1 bit bool flag (yes/no)
183 case 'C':
184 cursor += 1;
185 break;
187 // 32 bit int
188 case 'I':
189 // <- fall thru
191 // float
192 case 'F':
193 cursor += 4;
194 break;
196 // double
197 case 'D':
198 cursor += 8;
199 break;
201 // 64 bit int
202 case 'L':
203 cursor += 8;
204 break;
206 // note: do not write cursor += ReadWord(...cursor) as this would be UB
208 // raw binary data
209 case 'R':
210 {
211 const uint32_t length = ReadWord(input, cursor, end);
212 cursor += length;
213 break;
214 }
216 case 'b':
217 // TODO: what is the 'b' type code? Right now we just skip over it /
218 // take the full range we could get
219 cursor = end;
220 break;
222 // array of *
223 case 'f':
224 case 'd':
225 case 'l':
226 case 'i': {
228 const uint32_t length = ReadWord(input, cursor, end);
229 const uint32_t encoding = ReadWord(input, cursor, end);
231 const uint32_t comp_len = ReadWord(input, cursor, end);
233 // compute length based on type and check against the stored value
234 if(encoding == 0) {
235 uint32_t stride;
236 switch(type)
237 {
238 case 'f':
239 case 'i':
240 stride = 4;
241 break;
243 case 'd':
244 case 'l':
245 stride = 8;
246 break;
248 default:
249 ai_assert(false);
250 };
251 if(length * stride != comp_len) {
252 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
253 }
254 }
255 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
256 else if (encoding != 1) {
257 TokenizeError("cannot ReadData, unknown encoding",input, cursor);
258 }
259 cursor += comp_len;
260 break;
261 }
263 // string
264 case 'S': {
265 const char* sb, *se;
266 // 0 characters can legally happen in such strings
267 ReadString(sb, se, input, cursor, end, true, true);
268 break;
269 }
270 default:
271 TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
272 }
274 if(cursor > end) {
275 TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
276 }
278 // the type code is contained in the returned range
279 send_out = cursor;
280 }
283 // ------------------------------------------------------------------------------------------------
284 bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
285 {
286 // the first word contains the offset at which this block ends
287 const uint32_t end_offset = ReadWord(input, cursor, end);
289 // we may get 0 if reading reached the end of the file -
290 // fbx files have a mysterious extra footer which I don't know
291 // how to extract any information from, but at least it always
292 // starts with a 0.
293 if(!end_offset) {
294 return false;
295 }
297 if(end_offset > Offset(input, end)) {
298 TokenizeError("block offset is out of range",input, cursor);
299 }
300 else if(end_offset < Offset(input, cursor)) {
301 TokenizeError("block offset is negative out of range",input, cursor);
302 }
304 // the second data word contains the number of properties in the scope
305 const uint32_t prop_count = ReadWord(input, cursor, end);
307 // the third data word contains the length of the property list
308 const uint32_t prop_length = ReadWord(input, cursor, end);
310 // now comes the name of the scope/key
311 const char* sbeg, *send;
312 ReadString(sbeg, send, input, cursor, end);
314 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
316 // now come the individual properties
317 const char* begin_cursor = cursor;
318 for (unsigned int i = 0; i < prop_count; ++i) {
319 ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
321 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
323 if(i != prop_count-1) {
324 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
325 }
326 }
328 if (Offset(begin_cursor, cursor) != prop_length) {
329 TokenizeError("property length not reached, something is wrong",input, cursor);
330 }
332 // at the end of each nested block, there is a NUL record to indicate
333 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
334 // this NUL record is 13 bytes long.
335 #define BLOCK_SENTINEL_LENGTH 13
337 if (Offset(input, cursor) < end_offset) {
339 if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
340 TokenizeError("insufficient padding bytes at block end",input, cursor);
341 }
343 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
345 // XXX this is vulnerable to stack overflowing ..
346 while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
347 ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
348 }
349 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
351 for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
352 if(cursor[i] != '\0') {
353 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
354 }
355 }
356 cursor += BLOCK_SENTINEL_LENGTH;
357 }
359 if (Offset(input, cursor) != end_offset) {
360 TokenizeError("scope length not reached, something is wrong",input, cursor);
361 }
363 return true;
364 }
367 }
369 // ------------------------------------------------------------------------------------------------
370 void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
371 {
372 ai_assert(input);
374 if(length < 0x1b) {
375 TokenizeError("file is too short",0);
376 }
378 if (strncmp(input,"Kaydara FBX Binary",18)) {
379 TokenizeError("magic bytes not found",0);
380 }
383 //uint32_t offset = 0x1b;
385 const char* cursor = input + 0x1b;
387 while (cursor < input + length) {
388 if(!ReadScope(output_tokens, input, cursor, input + length)) {
389 break;
390 }
391 }
392 }
394 } // !FBX
395 } // !Assimp
397 #endif