nuclear@0: /* nuclear@0: Open Asset Import Library (assimp) nuclear@0: ---------------------------------------------------------------------- nuclear@0: nuclear@0: Copyright (c) 2006-2012, assimp team nuclear@0: All rights reserved. nuclear@0: nuclear@0: Redistribution and use of this software in source and binary forms, nuclear@0: with or without modification, are permitted provided that the nuclear@0: following conditions are met: nuclear@0: nuclear@0: * Redistributions of source code must retain the above nuclear@0: copyright notice, this list of conditions and the nuclear@0: following disclaimer. nuclear@0: nuclear@0: * Redistributions in binary form must reproduce the above nuclear@0: copyright notice, this list of conditions and the nuclear@0: following disclaimer in the documentation and/or other nuclear@0: materials provided with the distribution. nuclear@0: nuclear@0: * Neither the name of the assimp team, nor the names of its nuclear@0: contributors may be used to endorse or promote products nuclear@0: derived from this software without specific prior nuclear@0: written permission of the assimp team. nuclear@0: nuclear@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS nuclear@0: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT nuclear@0: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR nuclear@0: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT nuclear@0: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, nuclear@0: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT nuclear@0: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, nuclear@0: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY nuclear@0: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT nuclear@0: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE nuclear@0: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. nuclear@0: nuclear@0: ---------------------------------------------------------------------- nuclear@0: */ nuclear@0: /** @file FBXBinaryTokenizer.cpp nuclear@0: * @brief Implementation of a fake lexer for binary fbx files - nuclear@0: * we emit tokens so the parser needs almost no special handling nuclear@0: * for binary files. nuclear@0: */ nuclear@0: #include "AssimpPCH.h" nuclear@0: nuclear@0: #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER nuclear@0: nuclear@0: #include "FBXTokenizer.h" nuclear@0: #include "FBXUtil.h" nuclear@0: nuclear@0: namespace Assimp { nuclear@0: namespace FBX { nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset) nuclear@0: : sbegin(sbegin) nuclear@0: , send(send) nuclear@0: , type(type) nuclear@0: , line(offset) nuclear@0: , column(BINARY_MARKER) nuclear@0: #ifdef DEBUG nuclear@0: , contents(sbegin, static_cast(send-sbegin)) nuclear@0: #endif nuclear@0: { nuclear@0: ai_assert(sbegin); nuclear@0: ai_assert(send); nuclear@0: nuclear@0: // binary tokens may have zero length because they are sometimes dummies nuclear@0: // inserted by TokenizeBinary() nuclear@0: ai_assert(send >= sbegin); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: namespace { nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. nuclear@0: void TokenizeError(const std::string& message, unsigned int offset) nuclear@0: { nuclear@0: throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset)); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: uint32_t Offset(const char* begin, const char* cursor) nuclear@0: { nuclear@0: ai_assert(begin <= cursor); nuclear@0: return static_cast(cursor - begin); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: void TokenizeError(const std::string& message, const char* begin, const char* cursor) nuclear@0: { nuclear@0: TokenizeError(message, Offset(begin, cursor)); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: uint32_t ReadWord(const char* input, const char*& cursor, const char* end) nuclear@0: { nuclear@0: if(Offset(cursor, end) < 4) { nuclear@0: TokenizeError("cannot ReadWord, out of bounds",input, cursor); nuclear@0: } nuclear@0: nuclear@0: uint32_t word = *reinterpret_cast(cursor); nuclear@0: AI_SWAP4(word); nuclear@0: nuclear@0: cursor += 4; nuclear@0: nuclear@0: return word; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: uint8_t ReadByte(const char* input, const char*& cursor, const char* end) nuclear@0: { nuclear@0: if(Offset(cursor, end) < 1) { nuclear@0: TokenizeError("cannot ReadByte, out of bounds",input, cursor); nuclear@0: } nuclear@0: nuclear@0: uint8_t word = *reinterpret_cast(cursor); nuclear@0: ++cursor; nuclear@0: nuclear@0: return word; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end, nuclear@0: bool long_length = false, nuclear@0: bool allow_null = false) nuclear@0: { nuclear@0: const uint32_t len_len = long_length ? 4 : 1; nuclear@0: if(Offset(cursor, end) < len_len) { nuclear@0: TokenizeError("cannot ReadString, out of bounds reading length",input, cursor); nuclear@0: } nuclear@0: nuclear@0: const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end); nuclear@0: nuclear@0: if (Offset(cursor, end) < length) { nuclear@0: TokenizeError("cannot ReadString, length is out of bounds",input, cursor); nuclear@0: } nuclear@0: nuclear@0: sbegin_out = cursor; nuclear@0: cursor += length; nuclear@0: nuclear@0: send_out = cursor; nuclear@0: nuclear@0: if(!allow_null) { nuclear@0: for (unsigned int i = 0; i < length; ++i) { nuclear@0: if(sbegin_out[i] == '\0') { nuclear@0: TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor); nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: return length; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end) nuclear@0: { nuclear@0: if(Offset(cursor, end) < 1) { nuclear@0: TokenizeError("cannot ReadData, out of bounds reading length",input, cursor); nuclear@0: } nuclear@0: nuclear@0: const char type = *cursor; nuclear@0: sbegin_out = cursor++; nuclear@0: nuclear@0: switch(type) nuclear@0: { nuclear@0: // 16 bit int nuclear@0: case 'Y': nuclear@0: cursor += 2; nuclear@0: break; nuclear@0: nuclear@0: // 1 bit bool flag (yes/no) nuclear@0: case 'C': nuclear@0: cursor += 1; nuclear@0: break; nuclear@0: nuclear@0: // 32 bit int nuclear@0: case 'I': nuclear@0: // <- fall thru nuclear@0: nuclear@0: // float nuclear@0: case 'F': nuclear@0: cursor += 4; nuclear@0: break; nuclear@0: nuclear@0: // double nuclear@0: case 'D': nuclear@0: cursor += 8; nuclear@0: break; nuclear@0: nuclear@0: // 64 bit int nuclear@0: case 'L': nuclear@0: cursor += 8; nuclear@0: break; nuclear@0: nuclear@0: // note: do not write cursor += ReadWord(...cursor) as this would be UB nuclear@0: nuclear@0: // raw binary data nuclear@0: case 'R': nuclear@0: { nuclear@0: const uint32_t length = ReadWord(input, cursor, end); nuclear@0: cursor += length; nuclear@0: break; nuclear@0: } nuclear@0: nuclear@0: case 'b': nuclear@0: // TODO: what is the 'b' type code? Right now we just skip over it / nuclear@0: // take the full range we could get nuclear@0: cursor = end; nuclear@0: break; nuclear@0: nuclear@0: // array of * nuclear@0: case 'f': nuclear@0: case 'd': nuclear@0: case 'l': nuclear@0: case 'i': { nuclear@0: nuclear@0: const uint32_t length = ReadWord(input, cursor, end); nuclear@0: const uint32_t encoding = ReadWord(input, cursor, end); nuclear@0: nuclear@0: const uint32_t comp_len = ReadWord(input, cursor, end); nuclear@0: nuclear@0: // compute length based on type and check against the stored value nuclear@0: if(encoding == 0) { nuclear@0: uint32_t stride; nuclear@0: switch(type) nuclear@0: { nuclear@0: case 'f': nuclear@0: case 'i': nuclear@0: stride = 4; nuclear@0: break; nuclear@0: nuclear@0: case 'd': nuclear@0: case 'l': nuclear@0: stride = 8; nuclear@0: break; nuclear@0: nuclear@0: default: nuclear@0: ai_assert(false); nuclear@0: }; nuclear@0: if(length * stride != comp_len) { nuclear@0: TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor); nuclear@0: } nuclear@0: } nuclear@0: // zip/deflate algorithm (encoding==1)? take given length. anything else? die nuclear@0: else if (encoding != 1) { nuclear@0: TokenizeError("cannot ReadData, unknown encoding",input, cursor); nuclear@0: } nuclear@0: cursor += comp_len; nuclear@0: break; nuclear@0: } nuclear@0: nuclear@0: // string nuclear@0: case 'S': { nuclear@0: const char* sb, *se; nuclear@0: // 0 characters can legally happen in such strings nuclear@0: ReadString(sb, se, input, cursor, end, true, true); nuclear@0: break; nuclear@0: } nuclear@0: default: nuclear@0: TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor); nuclear@0: } nuclear@0: nuclear@0: if(cursor > end) { nuclear@0: TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor); nuclear@0: } nuclear@0: nuclear@0: // the type code is contained in the returned range nuclear@0: send_out = cursor; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end) nuclear@0: { nuclear@0: // the first word contains the offset at which this block ends nuclear@0: const uint32_t end_offset = ReadWord(input, cursor, end); nuclear@0: nuclear@0: // we may get 0 if reading reached the end of the file - nuclear@0: // fbx files have a mysterious extra footer which I don't know nuclear@0: // how to extract any information from, but at least it always nuclear@0: // starts with a 0. nuclear@0: if(!end_offset) { nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: if(end_offset > Offset(input, end)) { nuclear@0: TokenizeError("block offset is out of range",input, cursor); nuclear@0: } nuclear@0: else if(end_offset < Offset(input, cursor)) { nuclear@0: TokenizeError("block offset is negative out of range",input, cursor); nuclear@0: } nuclear@0: nuclear@0: // the second data word contains the number of properties in the scope nuclear@0: const uint32_t prop_count = ReadWord(input, cursor, end); nuclear@0: nuclear@0: // the third data word contains the length of the property list nuclear@0: const uint32_t prop_length = ReadWord(input, cursor, end); nuclear@0: nuclear@0: // now comes the name of the scope/key nuclear@0: const char* sbeg, *send; nuclear@0: ReadString(sbeg, send, input, cursor, end); nuclear@0: nuclear@0: output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) )); nuclear@0: nuclear@0: // now come the individual properties nuclear@0: const char* begin_cursor = cursor; nuclear@0: for (unsigned int i = 0; i < prop_count; ++i) { nuclear@0: ReadData(sbeg, send, input, cursor, begin_cursor + prop_length); nuclear@0: nuclear@0: output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) )); nuclear@0: nuclear@0: if(i != prop_count-1) { nuclear@0: output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) )); nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: if (Offset(begin_cursor, cursor) != prop_length) { nuclear@0: TokenizeError("property length not reached, something is wrong",input, cursor); nuclear@0: } nuclear@0: nuclear@0: // at the end of each nested block, there is a NUL record to indicate nuclear@0: // that the sub-scope exists (i.e. to distinguish between P: and P : {}) nuclear@0: // this NUL record is 13 bytes long. nuclear@0: #define BLOCK_SENTINEL_LENGTH 13 nuclear@0: nuclear@0: if (Offset(input, cursor) < end_offset) { nuclear@0: nuclear@0: if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) { nuclear@0: TokenizeError("insufficient padding bytes at block end",input, cursor); nuclear@0: } nuclear@0: nuclear@0: output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) )); nuclear@0: nuclear@0: // XXX this is vulnerable to stack overflowing .. nuclear@0: while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) { nuclear@0: ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH); nuclear@0: } nuclear@0: output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) )); nuclear@0: nuclear@0: for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) { nuclear@0: if(cursor[i] != '\0') { nuclear@0: TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor); nuclear@0: } nuclear@0: } nuclear@0: cursor += BLOCK_SENTINEL_LENGTH; nuclear@0: } nuclear@0: nuclear@0: if (Offset(input, cursor) != end_offset) { nuclear@0: TokenizeError("scope length not reached, something is wrong",input, cursor); nuclear@0: } nuclear@0: nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length) nuclear@0: { nuclear@0: ai_assert(input); nuclear@0: nuclear@0: if(length < 0x1b) { nuclear@0: TokenizeError("file is too short",0); nuclear@0: } nuclear@0: nuclear@0: if (strncmp(input,"Kaydara FBX Binary",18)) { nuclear@0: TokenizeError("magic bytes not found",0); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: //uint32_t offset = 0x1b; nuclear@0: nuclear@0: const char* cursor = input + 0x1b; nuclear@0: nuclear@0: while (cursor < input + length) { nuclear@0: if(!ReadScope(output_tokens, input, cursor, input + length)) { nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: } // !FBX nuclear@0: } // !Assimp nuclear@0: nuclear@0: #endif