nuclear@0: /*
nuclear@0: Open Asset Import Library (assimp)
nuclear@0: ----------------------------------------------------------------------
nuclear@0: 
nuclear@0: Copyright (c) 2006-2012, assimp team
nuclear@0: All rights reserved.
nuclear@0: 
nuclear@0: Redistribution and use of this software in source and binary forms, 
nuclear@0: with or without modification, are permitted provided that the 
nuclear@0: following conditions are met:
nuclear@0: 
nuclear@0: * Redistributions of source code must retain the above
nuclear@0:   copyright notice, this list of conditions and the
nuclear@0:   following disclaimer.
nuclear@0: 
nuclear@0: * Redistributions in binary form must reproduce the above
nuclear@0:   copyright notice, this list of conditions and the
nuclear@0:   following disclaimer in the documentation and/or other
nuclear@0:   materials provided with the distribution.
nuclear@0: 
nuclear@0: * Neither the name of the assimp team, nor the names of its
nuclear@0:   contributors may be used to endorse or promote products
nuclear@0:   derived from this software without specific prior
nuclear@0:   written permission of the assimp team.
nuclear@0: 
nuclear@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
nuclear@0: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
nuclear@0: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
nuclear@0: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
nuclear@0: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
nuclear@0: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
nuclear@0: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
nuclear@0: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
nuclear@0: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
nuclear@0: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
nuclear@0: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
nuclear@0: 
nuclear@0: ----------------------------------------------------------------------
nuclear@0: */
nuclear@0: /** @file  FBXBinaryTokenizer.cpp
nuclear@0:  *  @brief Implementation of a fake lexer for binary fbx files -
nuclear@0:  *    we emit tokens so the parser needs almost no special handling
nuclear@0:  *    for binary files.
nuclear@0:  */
nuclear@0: #include "AssimpPCH.h"
nuclear@0: 
nuclear@0: #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
nuclear@0: 
nuclear@0: #include "FBXTokenizer.h"
nuclear@0: #include "FBXUtil.h"
nuclear@0: 
nuclear@0: namespace Assimp {
nuclear@0: namespace FBX {
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
nuclear@0: 	: sbegin(sbegin)
nuclear@0: 	, send(send)
nuclear@0: 	, type(type)
nuclear@0: 	, line(offset)
nuclear@0: 	, column(BINARY_MARKER)
nuclear@0: #ifdef DEBUG
nuclear@0: 	, contents(sbegin, static_cast<size_t>(send-sbegin))
nuclear@0: #endif
nuclear@0: {
nuclear@0: 	ai_assert(sbegin);
nuclear@0: 	ai_assert(send);
nuclear@0: 
nuclear@0: 	// binary tokens may have zero length because they are sometimes dummies
nuclear@0: 	// inserted by TokenizeBinary()
nuclear@0: 	ai_assert(send >= sbegin);
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: namespace {
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
nuclear@0: void TokenizeError(const std::string& message, unsigned int offset)
nuclear@0: {
nuclear@0: 	throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: uint32_t Offset(const char* begin, const char* cursor)
nuclear@0: {
nuclear@0: 	ai_assert(begin <= cursor);
nuclear@0: 	return static_cast<unsigned int>(cursor - begin);
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: void TokenizeError(const std::string& message, const char* begin, const char* cursor)
nuclear@0: {
nuclear@0: 	TokenizeError(message, Offset(begin, cursor));
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
nuclear@0: {
nuclear@0: 	if(Offset(cursor, end) < 4) {
nuclear@0: 		TokenizeError("cannot ReadWord, out of bounds",input, cursor);
nuclear@0: 	} 
nuclear@0: 
nuclear@0: 	uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
nuclear@0: 	AI_SWAP4(word);
nuclear@0: 
nuclear@0: 	cursor += 4;
nuclear@0: 
nuclear@0: 	return word;
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
nuclear@0: {
nuclear@0: 	if(Offset(cursor, end) < 1) {
nuclear@0: 		TokenizeError("cannot ReadByte, out of bounds",input, cursor);
nuclear@0: 	} 
nuclear@0: 
nuclear@0: 	uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
nuclear@0: 	++cursor;
nuclear@0: 
nuclear@0: 	return word;
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end, 
nuclear@0: 	bool long_length = false,
nuclear@0: 	bool allow_null = false)
nuclear@0: {
nuclear@0: 	const uint32_t len_len = long_length ? 4 : 1;
nuclear@0: 	if(Offset(cursor, end) < len_len) {
nuclear@0: 		TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
nuclear@0: 	} 
nuclear@0: 
nuclear@0: 	const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
nuclear@0: 
nuclear@0: 	if (Offset(cursor, end) < length) {
nuclear@0: 		TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	sbegin_out = cursor;
nuclear@0: 	cursor += length;
nuclear@0: 
nuclear@0: 	send_out = cursor;
nuclear@0: 
nuclear@0: 	if(!allow_null) {
nuclear@0: 		for (unsigned int i = 0; i < length; ++i) {
nuclear@0: 			if(sbegin_out[i] == '\0') {
nuclear@0: 				TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
nuclear@0: 			}
nuclear@0: 		}
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	return length;
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
nuclear@0: {
nuclear@0: 	if(Offset(cursor, end) < 1) {
nuclear@0: 		TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
nuclear@0: 	} 
nuclear@0: 
nuclear@0: 	const char type = *cursor;
nuclear@0: 	sbegin_out = cursor++;
nuclear@0: 
nuclear@0: 	switch(type)
nuclear@0: 	{
nuclear@0: 		// 16 bit int
nuclear@0: 	case 'Y':
nuclear@0: 		cursor += 2;
nuclear@0: 		break;
nuclear@0: 
nuclear@0: 		// 1 bit bool flag (yes/no)
nuclear@0: 	case 'C':
nuclear@0: 		cursor += 1;
nuclear@0: 		break;
nuclear@0: 
nuclear@0: 		// 32 bit int
nuclear@0: 	case 'I':
nuclear@0: 		// <- fall thru
nuclear@0: 
nuclear@0: 		// float
nuclear@0: 	case 'F':
nuclear@0: 		cursor += 4;
nuclear@0: 		break;
nuclear@0: 
nuclear@0: 		// double
nuclear@0: 	case 'D':
nuclear@0: 		cursor += 8;
nuclear@0: 		break;
nuclear@0: 
nuclear@0: 		// 64 bit int
nuclear@0: 	case 'L':
nuclear@0: 		cursor += 8;
nuclear@0: 		break;
nuclear@0: 
nuclear@0: 		// note: do not write cursor += ReadWord(...cursor) as this would be UB
nuclear@0: 
nuclear@0: 		// raw binary data
nuclear@0: 	case 'R':	
nuclear@0: 	{
nuclear@0: 		const uint32_t length = ReadWord(input, cursor, end);
nuclear@0: 		cursor += length;
nuclear@0: 		break;
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	case 'b': 
nuclear@0: 		// TODO: what is the 'b' type code? Right now we just skip over it /
nuclear@0: 		// take the full range we could get
nuclear@0: 		cursor = end;
nuclear@0: 		break;
nuclear@0: 
nuclear@0: 		// array of *
nuclear@0: 	case 'f':
nuclear@0: 	case 'd':
nuclear@0: 	case 'l':
nuclear@0: 	case 'i':	{
nuclear@0: 	
nuclear@0: 		const uint32_t length = ReadWord(input, cursor, end);
nuclear@0: 		const uint32_t encoding = ReadWord(input, cursor, end);
nuclear@0: 
nuclear@0: 		const uint32_t comp_len = ReadWord(input, cursor, end);
nuclear@0: 
nuclear@0: 		// compute length based on type and check against the stored value
nuclear@0: 		if(encoding == 0) {
nuclear@0: 			uint32_t stride;
nuclear@0: 			switch(type)
nuclear@0: 			{
nuclear@0: 			case 'f':
nuclear@0: 			case 'i':
nuclear@0: 				stride = 4;
nuclear@0: 				break;
nuclear@0: 
nuclear@0: 			case 'd':
nuclear@0: 			case 'l':
nuclear@0: 				stride = 8;
nuclear@0: 				break;
nuclear@0: 
nuclear@0: 			default:
nuclear@0: 				ai_assert(false);
nuclear@0: 			};
nuclear@0: 			if(length * stride != comp_len) {
nuclear@0: 				TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
nuclear@0: 			}
nuclear@0: 		}
nuclear@0: 		// zip/deflate algorithm (encoding==1)? take given length. anything else? die
nuclear@0: 		else if (encoding != 1) {			
nuclear@0: 			TokenizeError("cannot ReadData, unknown encoding",input, cursor);
nuclear@0: 		}
nuclear@0: 		cursor += comp_len;
nuclear@0: 		break;
nuclear@0: 	}
nuclear@0: 
nuclear@0: 		// string
nuclear@0: 	case 'S': {
nuclear@0: 		const char* sb, *se;
nuclear@0: 		// 0 characters can legally happen in such strings
nuclear@0: 		ReadString(sb, se, input, cursor, end, true, true);
nuclear@0: 		break;
nuclear@0: 	}
nuclear@0: 	default:
nuclear@0: 		TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	if(cursor > end) {
nuclear@0: 		TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
nuclear@0: 	} 
nuclear@0: 
nuclear@0: 	// the type code is contained in the returned range
nuclear@0: 	send_out = cursor;
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
nuclear@0: {
nuclear@0: 	// the first word contains the offset at which this block ends
nuclear@0: 	const uint32_t end_offset = ReadWord(input, cursor, end);
nuclear@0: 	
nuclear@0: 	// we may get 0 if reading reached the end of the file -
nuclear@0: 	// fbx files have a mysterious extra footer which I don't know 
nuclear@0: 	// how to extract any information from, but at least it always 
nuclear@0: 	// starts with a 0.
nuclear@0: 	if(!end_offset) {
nuclear@0: 		return false;
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	if(end_offset > Offset(input, end)) {
nuclear@0: 		TokenizeError("block offset is out of range",input, cursor);
nuclear@0: 	}
nuclear@0: 	else if(end_offset < Offset(input, cursor)) {
nuclear@0: 		TokenizeError("block offset is negative out of range",input, cursor);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	// the second data word contains the number of properties in the scope
nuclear@0: 	const uint32_t prop_count = ReadWord(input, cursor, end);
nuclear@0: 
nuclear@0: 	// the third data word contains the length of the property list
nuclear@0: 	const uint32_t prop_length = ReadWord(input, cursor, end);
nuclear@0: 
nuclear@0: 	// now comes the name of the scope/key
nuclear@0: 	const char* sbeg, *send;
nuclear@0: 	ReadString(sbeg, send, input, cursor, end);
nuclear@0: 
nuclear@0: 	output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
nuclear@0: 
nuclear@0: 	// now come the individual properties
nuclear@0: 	const char* begin_cursor = cursor;
nuclear@0: 	for (unsigned int i = 0; i < prop_count; ++i) {
nuclear@0: 		ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
nuclear@0: 
nuclear@0: 		output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
nuclear@0: 
nuclear@0: 		if(i != prop_count-1) {
nuclear@0: 			output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
nuclear@0: 		}
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	if (Offset(begin_cursor, cursor) != prop_length) {
nuclear@0: 		TokenizeError("property length not reached, something is wrong",input, cursor);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	// at the end of each nested block, there is a NUL record to indicate
nuclear@0: 	// that the sub-scope exists (i.e. to distinguish between P: and P : {})
nuclear@0: 	// this NUL record is 13 bytes long.
nuclear@0: #define BLOCK_SENTINEL_LENGTH 13
nuclear@0: 
nuclear@0: 	if (Offset(input, cursor) < end_offset) {
nuclear@0: 
nuclear@0: 		if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
nuclear@0: 			TokenizeError("insufficient padding bytes at block end",input, cursor);
nuclear@0: 		}
nuclear@0: 
nuclear@0: 		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
nuclear@0: 
nuclear@0: 		// XXX this is vulnerable to stack overflowing ..
nuclear@0: 		while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
nuclear@0: 			ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
nuclear@0: 		}
nuclear@0: 		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
nuclear@0: 
nuclear@0: 		for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
nuclear@0: 			if(cursor[i] != '\0') {
nuclear@0: 				TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
nuclear@0: 			}
nuclear@0: 		}
nuclear@0: 		cursor += BLOCK_SENTINEL_LENGTH;
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	if (Offset(input, cursor) != end_offset) {
nuclear@0: 		TokenizeError("scope length not reached, something is wrong",input, cursor);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	return true;
nuclear@0: }
nuclear@0: 
nuclear@0: 
nuclear@0: }
nuclear@0: 
nuclear@0: // ------------------------------------------------------------------------------------------------
nuclear@0: void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
nuclear@0: {
nuclear@0: 	ai_assert(input);
nuclear@0: 
nuclear@0: 	if(length < 0x1b) {
nuclear@0: 		TokenizeError("file is too short",0);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 	if (strncmp(input,"Kaydara FBX Binary",18)) {
nuclear@0: 		TokenizeError("magic bytes not found",0);
nuclear@0: 	}
nuclear@0: 
nuclear@0: 
nuclear@0: 	//uint32_t offset = 0x1b;
nuclear@0: 
nuclear@0: 	const char* cursor = input + 0x1b;
nuclear@0: 
nuclear@0: 	while (cursor < input + length) {
nuclear@0: 		if(!ReadScope(output_tokens, input, cursor, input + length)) {
nuclear@0: 			break;
nuclear@0: 		}
nuclear@0: 	}
nuclear@0: }
nuclear@0: 
nuclear@0: } // !FBX
nuclear@0: } // !Assimp
nuclear@0: 
nuclear@0: #endif