vrshoot

diff libs/assimp/FBXBinaryTokenizer.cpp @ 0:b2f14e535253
initial commit
author: John Tsiombikas <nuclear@member.fsf.org>
date: Sat, 01 Feb 2014 19:58:19 +0200
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libs/assimp/FBXBinaryTokenizer.cpp	Sat Feb 01 19:58:19 2014 +0200
     1.3 @@ -0,0 +1,397 @@
     1.4 +/*
     1.5 +Open Asset Import Library (assimp)
     1.6 +----------------------------------------------------------------------
     1.7 +
     1.8 +Copyright (c) 2006-2012, assimp team
     1.9 +All rights reserved.
    1.10 +
    1.11 +Redistribution and use of this software in source and binary forms, 
    1.12 +with or without modification, are permitted provided that the 
    1.13 +following conditions are met:
    1.14 +
    1.15 +* Redistributions of source code must retain the above
    1.16 +  copyright notice, this list of conditions and the
    1.17 +  following disclaimer.
    1.18 +
    1.19 +* Redistributions in binary form must reproduce the above
    1.20 +  copyright notice, this list of conditions and the
    1.21 +  following disclaimer in the documentation and/or other
    1.22 +  materials provided with the distribution.
    1.23 +
    1.24 +* Neither the name of the assimp team, nor the names of its
    1.25 +  contributors may be used to endorse or promote products
    1.26 +  derived from this software without specific prior
    1.27 +  written permission of the assimp team.
    1.28 +
    1.29 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
    1.30 +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
    1.31 +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.32 +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
    1.33 +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    1.34 +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
    1.35 +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    1.36 +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
    1.37 +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    1.38 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
    1.39 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.40 +
    1.41 +----------------------------------------------------------------------
    1.42 +*/
    1.43 +/** @file  FBXBinaryTokenizer.cpp
    1.44 + *  @brief Implementation of a fake lexer for binary fbx files -
    1.45 + *    we emit tokens so the parser needs almost no special handling
    1.46 + *    for binary files.
    1.47 + */
    1.48 +#include "AssimpPCH.h"
    1.49 +
    1.50 +#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
    1.51 +
    1.52 +#include "FBXTokenizer.h"
    1.53 +#include "FBXUtil.h"
    1.54 +
    1.55 +namespace Assimp {
    1.56 +namespace FBX {
    1.57 +
    1.58 +
    1.59 +// ------------------------------------------------------------------------------------------------
    1.60 +Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
    1.61 +	: sbegin(sbegin)
    1.62 +	, send(send)
    1.63 +	, type(type)
    1.64 +	, line(offset)
    1.65 +	, column(BINARY_MARKER)
    1.66 +#ifdef DEBUG
    1.67 +	, contents(sbegin, static_cast<size_t>(send-sbegin))
    1.68 +#endif
    1.69 +{
    1.70 +	ai_assert(sbegin);
    1.71 +	ai_assert(send);
    1.72 +
    1.73 +	// binary tokens may have zero length because they are sometimes dummies
    1.74 +	// inserted by TokenizeBinary()
    1.75 +	ai_assert(send >= sbegin);
    1.76 +}
    1.77 +
    1.78 +
    1.79 +namespace {
    1.80 +
    1.81 +// ------------------------------------------------------------------------------------------------
    1.82 +// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
    1.83 +void TokenizeError(const std::string& message, unsigned int offset)
    1.84 +{
    1.85 +	throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
    1.86 +}
    1.87 +
    1.88 +
    1.89 +// ------------------------------------------------------------------------------------------------
    1.90 +uint32_t Offset(const char* begin, const char* cursor)
    1.91 +{
    1.92 +	ai_assert(begin <= cursor);
    1.93 +	return static_cast<unsigned int>(cursor - begin);
    1.94 +}
    1.95 +
    1.96 +
    1.97 +// ------------------------------------------------------------------------------------------------
    1.98 +void TokenizeError(const std::string& message, const char* begin, const char* cursor)
    1.99 +{
   1.100 +	TokenizeError(message, Offset(begin, cursor));
   1.101 +}
   1.102 +
   1.103 +
   1.104 +// ------------------------------------------------------------------------------------------------
   1.105 +uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
   1.106 +{
   1.107 +	if(Offset(cursor, end) < 4) {
   1.108 +		TokenizeError("cannot ReadWord, out of bounds",input, cursor);
   1.109 +	} 
   1.110 +
   1.111 +	uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
   1.112 +	AI_SWAP4(word);
   1.113 +
   1.114 +	cursor += 4;
   1.115 +
   1.116 +	return word;
   1.117 +}
   1.118 +
   1.119 +
   1.120 +// ------------------------------------------------------------------------------------------------
   1.121 +uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
   1.122 +{
   1.123 +	if(Offset(cursor, end) < 1) {
   1.124 +		TokenizeError("cannot ReadByte, out of bounds",input, cursor);
   1.125 +	} 
   1.126 +
   1.127 +	uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
   1.128 +	++cursor;
   1.129 +
   1.130 +	return word;
   1.131 +}
   1.132 +
   1.133 +
   1.134 +// ------------------------------------------------------------------------------------------------
   1.135 +unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end, 
   1.136 +	bool long_length = false,
   1.137 +	bool allow_null = false)
   1.138 +{
   1.139 +	const uint32_t len_len = long_length ? 4 : 1;
   1.140 +	if(Offset(cursor, end) < len_len) {
   1.141 +		TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
   1.142 +	} 
   1.143 +
   1.144 +	const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
   1.145 +
   1.146 +	if (Offset(cursor, end) < length) {
   1.147 +		TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
   1.148 +	}
   1.149 +
   1.150 +	sbegin_out = cursor;
   1.151 +	cursor += length;
   1.152 +
   1.153 +	send_out = cursor;
   1.154 +
   1.155 +	if(!allow_null) {
   1.156 +		for (unsigned int i = 0; i < length; ++i) {
   1.157 +			if(sbegin_out[i] == '\0') {
   1.158 +				TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
   1.159 +			}
   1.160 +		}
   1.161 +	}
   1.162 +
   1.163 +	return length;
   1.164 +}
   1.165 +
   1.166 +
   1.167 +
   1.168 +// ------------------------------------------------------------------------------------------------
   1.169 +void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
   1.170 +{
   1.171 +	if(Offset(cursor, end) < 1) {
   1.172 +		TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
   1.173 +	} 
   1.174 +
   1.175 +	const char type = *cursor;
   1.176 +	sbegin_out = cursor++;
   1.177 +
   1.178 +	switch(type)
   1.179 +	{
   1.180 +		// 16 bit int
   1.181 +	case 'Y':
   1.182 +		cursor += 2;
   1.183 +		break;
   1.184 +
   1.185 +		// 1 bit bool flag (yes/no)
   1.186 +	case 'C':
   1.187 +		cursor += 1;
   1.188 +		break;
   1.189 +
   1.190 +		// 32 bit int
   1.191 +	case 'I':
   1.192 +		// <- fall thru
   1.193 +
   1.194 +		// float
   1.195 +	case 'F':
   1.196 +		cursor += 4;
   1.197 +		break;
   1.198 +
   1.199 +		// double
   1.200 +	case 'D':
   1.201 +		cursor += 8;
   1.202 +		break;
   1.203 +
   1.204 +		// 64 bit int
   1.205 +	case 'L':
   1.206 +		cursor += 8;
   1.207 +		break;
   1.208 +
   1.209 +		// note: do not write cursor += ReadWord(...cursor) as this would be UB
   1.210 +
   1.211 +		// raw binary data
   1.212 +	case 'R':	
   1.213 +	{
   1.214 +		const uint32_t length = ReadWord(input, cursor, end);
   1.215 +		cursor += length;
   1.216 +		break;
   1.217 +	}
   1.218 +
   1.219 +	case 'b': 
   1.220 +		// TODO: what is the 'b' type code? Right now we just skip over it /
   1.221 +		// take the full range we could get
   1.222 +		cursor = end;
   1.223 +		break;
   1.224 +
   1.225 +		// array of *
   1.226 +	case 'f':
   1.227 +	case 'd':
   1.228 +	case 'l':
   1.229 +	case 'i':	{
   1.230 +	
   1.231 +		const uint32_t length = ReadWord(input, cursor, end);
   1.232 +		const uint32_t encoding = ReadWord(input, cursor, end);
   1.233 +
   1.234 +		const uint32_t comp_len = ReadWord(input, cursor, end);
   1.235 +
   1.236 +		// compute length based on type and check against the stored value
   1.237 +		if(encoding == 0) {
   1.238 +			uint32_t stride;
   1.239 +			switch(type)
   1.240 +			{
   1.241 +			case 'f':
   1.242 +			case 'i':
   1.243 +				stride = 4;
   1.244 +				break;
   1.245 +
   1.246 +			case 'd':
   1.247 +			case 'l':
   1.248 +				stride = 8;
   1.249 +				break;
   1.250 +
   1.251 +			default:
   1.252 +				ai_assert(false);
   1.253 +			};
   1.254 +			if(length * stride != comp_len) {
   1.255 +				TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
   1.256 +			}
   1.257 +		}
   1.258 +		// zip/deflate algorithm (encoding==1)? take given length. anything else? die
   1.259 +		else if (encoding != 1) {			
   1.260 +			TokenizeError("cannot ReadData, unknown encoding",input, cursor);
   1.261 +		}
   1.262 +		cursor += comp_len;
   1.263 +		break;
   1.264 +	}
   1.265 +
   1.266 +		// string
   1.267 +	case 'S': {
   1.268 +		const char* sb, *se;
   1.269 +		// 0 characters can legally happen in such strings
   1.270 +		ReadString(sb, se, input, cursor, end, true, true);
   1.271 +		break;
   1.272 +	}
   1.273 +	default:
   1.274 +		TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
   1.275 +	}
   1.276 +
   1.277 +	if(cursor > end) {
   1.278 +		TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
   1.279 +	} 
   1.280 +
   1.281 +	// the type code is contained in the returned range
   1.282 +	send_out = cursor;
   1.283 +}
   1.284 +
   1.285 +
   1.286 +// ------------------------------------------------------------------------------------------------
   1.287 +bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
   1.288 +{
   1.289 +	// the first word contains the offset at which this block ends
   1.290 +	const uint32_t end_offset = ReadWord(input, cursor, end);
   1.291 +	
   1.292 +	// we may get 0 if reading reached the end of the file -
   1.293 +	// fbx files have a mysterious extra footer which I don't know 
   1.294 +	// how to extract any information from, but at least it always 
   1.295 +	// starts with a 0.
   1.296 +	if(!end_offset) {
   1.297 +		return false;
   1.298 +	}
   1.299 +
   1.300 +	if(end_offset > Offset(input, end)) {
   1.301 +		TokenizeError("block offset is out of range",input, cursor);
   1.302 +	}
   1.303 +	else if(end_offset < Offset(input, cursor)) {
   1.304 +		TokenizeError("block offset is negative out of range",input, cursor);
   1.305 +	}
   1.306 +
   1.307 +	// the second data word contains the number of properties in the scope
   1.308 +	const uint32_t prop_count = ReadWord(input, cursor, end);
   1.309 +
   1.310 +	// the third data word contains the length of the property list
   1.311 +	const uint32_t prop_length = ReadWord(input, cursor, end);
   1.312 +
   1.313 +	// now comes the name of the scope/key
   1.314 +	const char* sbeg, *send;
   1.315 +	ReadString(sbeg, send, input, cursor, end);
   1.316 +
   1.317 +	output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
   1.318 +
   1.319 +	// now come the individual properties
   1.320 +	const char* begin_cursor = cursor;
   1.321 +	for (unsigned int i = 0; i < prop_count; ++i) {
   1.322 +		ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
   1.323 +
   1.324 +		output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
   1.325 +
   1.326 +		if(i != prop_count-1) {
   1.327 +			output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
   1.328 +		}
   1.329 +	}
   1.330 +
   1.331 +	if (Offset(begin_cursor, cursor) != prop_length) {
   1.332 +		TokenizeError("property length not reached, something is wrong",input, cursor);
   1.333 +	}
   1.334 +
   1.335 +	// at the end of each nested block, there is a NUL record to indicate
   1.336 +	// that the sub-scope exists (i.e. to distinguish between P: and P : {})
   1.337 +	// this NUL record is 13 bytes long.
   1.338 +#define BLOCK_SENTINEL_LENGTH 13
   1.339 +
   1.340 +	if (Offset(input, cursor) < end_offset) {
   1.341 +
   1.342 +		if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
   1.343 +			TokenizeError("insufficient padding bytes at block end",input, cursor);
   1.344 +		}
   1.345 +
   1.346 +		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
   1.347 +
   1.348 +		// XXX this is vulnerable to stack overflowing ..
   1.349 +		while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
   1.350 +			ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
   1.351 +		}
   1.352 +		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
   1.353 +
   1.354 +		for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
   1.355 +			if(cursor[i] != '\0') {
   1.356 +				TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
   1.357 +			}
   1.358 +		}
   1.359 +		cursor += BLOCK_SENTINEL_LENGTH;
   1.360 +	}
   1.361 +
   1.362 +	if (Offset(input, cursor) != end_offset) {
   1.363 +		TokenizeError("scope length not reached, something is wrong",input, cursor);
   1.364 +	}
   1.365 +
   1.366 +	return true;
   1.367 +}
   1.368 +
   1.369 +
   1.370 +}
   1.371 +
   1.372 +// ------------------------------------------------------------------------------------------------
   1.373 +void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
   1.374 +{
   1.375 +	ai_assert(input);
   1.376 +
   1.377 +	if(length < 0x1b) {
   1.378 +		TokenizeError("file is too short",0);
   1.379 +	}
   1.380 +
   1.381 +	if (strncmp(input,"Kaydara FBX Binary",18)) {
   1.382 +		TokenizeError("magic bytes not found",0);
   1.383 +	}
   1.384 +
   1.385 +
   1.386 +	//uint32_t offset = 0x1b;
   1.387 +
   1.388 +	const char* cursor = input + 0x1b;
   1.389 +
   1.390 +	while (cursor < input + length) {
   1.391 +		if(!ReadScope(output_tokens, input, cursor, input + length)) {
   1.392 +			break;
   1.393 +		}
   1.394 +	}
   1.395 +}
   1.396 +
   1.397 +} // !FBX
   1.398 +} // !Assimp
   1.399 +
   1.400 +#endif
   1.401 \ No newline at end of file
author	John Tsiombikas <nuclear@member.fsf.org>
date	Sat, 01 Feb 2014 19:58:19 +0200
parents
children