vrshoot
diff libs/assimp/FBXBinaryTokenizer.cpp @ 0:b2f14e535253
initial commit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 01 Feb 2014 19:58:19 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libs/assimp/FBXBinaryTokenizer.cpp Sat Feb 01 19:58:19 2014 +0200 1.3 @@ -0,0 +1,397 @@ 1.4 +/* 1.5 +Open Asset Import Library (assimp) 1.6 +---------------------------------------------------------------------- 1.7 + 1.8 +Copyright (c) 2006-2012, assimp team 1.9 +All rights reserved. 1.10 + 1.11 +Redistribution and use of this software in source and binary forms, 1.12 +with or without modification, are permitted provided that the 1.13 +following conditions are met: 1.14 + 1.15 +* Redistributions of source code must retain the above 1.16 + copyright notice, this list of conditions and the 1.17 + following disclaimer. 1.18 + 1.19 +* Redistributions in binary form must reproduce the above 1.20 + copyright notice, this list of conditions and the 1.21 + following disclaimer in the documentation and/or other 1.22 + materials provided with the distribution. 1.23 + 1.24 +* Neither the name of the assimp team, nor the names of its 1.25 + contributors may be used to endorse or promote products 1.26 + derived from this software without specific prior 1.27 + written permission of the assimp team. 1.28 + 1.29 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.30 +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.31 +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.32 +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.33 +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.34 +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.35 +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.36 +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.37 +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.38 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.39 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.40 + 1.41 +---------------------------------------------------------------------- 1.42 +*/ 1.43 +/** @file FBXBinaryTokenizer.cpp 1.44 + * @brief Implementation of a fake lexer for binary fbx files - 1.45 + * we emit tokens so the parser needs almost no special handling 1.46 + * for binary files. 1.47 + */ 1.48 +#include "AssimpPCH.h" 1.49 + 1.50 +#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER 1.51 + 1.52 +#include "FBXTokenizer.h" 1.53 +#include "FBXUtil.h" 1.54 + 1.55 +namespace Assimp { 1.56 +namespace FBX { 1.57 + 1.58 + 1.59 +// ------------------------------------------------------------------------------------------------ 1.60 +Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset) 1.61 + : sbegin(sbegin) 1.62 + , send(send) 1.63 + , type(type) 1.64 + , line(offset) 1.65 + , column(BINARY_MARKER) 1.66 +#ifdef DEBUG 1.67 + , contents(sbegin, static_cast<size_t>(send-sbegin)) 1.68 +#endif 1.69 +{ 1.70 + ai_assert(sbegin); 1.71 + ai_assert(send); 1.72 + 1.73 + // binary tokens may have zero length because they are sometimes dummies 1.74 + // inserted by TokenizeBinary() 1.75 + ai_assert(send >= sbegin); 1.76 +} 1.77 + 1.78 + 1.79 +namespace { 1.80 + 1.81 +// ------------------------------------------------------------------------------------------------ 1.82 +// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. 1.83 +void TokenizeError(const std::string& message, unsigned int offset) 1.84 +{ 1.85 + throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset)); 1.86 +} 1.87 + 1.88 + 1.89 +// ------------------------------------------------------------------------------------------------ 1.90 +uint32_t Offset(const char* begin, const char* cursor) 1.91 +{ 1.92 + ai_assert(begin <= cursor); 1.93 + return static_cast<unsigned int>(cursor - begin); 1.94 +} 1.95 + 1.96 + 1.97 +// ------------------------------------------------------------------------------------------------ 1.98 +void TokenizeError(const std::string& message, const char* begin, const char* cursor) 1.99 +{ 1.100 + TokenizeError(message, Offset(begin, cursor)); 1.101 +} 1.102 + 1.103 + 1.104 +// ------------------------------------------------------------------------------------------------ 1.105 +uint32_t ReadWord(const char* input, const char*& cursor, const char* end) 1.106 +{ 1.107 + if(Offset(cursor, end) < 4) { 1.108 + TokenizeError("cannot ReadWord, out of bounds",input, cursor); 1.109 + } 1.110 + 1.111 + uint32_t word = *reinterpret_cast<const uint32_t*>(cursor); 1.112 + AI_SWAP4(word); 1.113 + 1.114 + cursor += 4; 1.115 + 1.116 + return word; 1.117 +} 1.118 + 1.119 + 1.120 +// ------------------------------------------------------------------------------------------------ 1.121 +uint8_t ReadByte(const char* input, const char*& cursor, const char* end) 1.122 +{ 1.123 + if(Offset(cursor, end) < 1) { 1.124 + TokenizeError("cannot ReadByte, out of bounds",input, cursor); 1.125 + } 1.126 + 1.127 + uint8_t word = *reinterpret_cast<const uint8_t*>(cursor); 1.128 + ++cursor; 1.129 + 1.130 + return word; 1.131 +} 1.132 + 1.133 + 1.134 +// ------------------------------------------------------------------------------------------------ 1.135 +unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end, 1.136 + bool long_length = false, 1.137 + bool allow_null = false) 1.138 +{ 1.139 + const uint32_t len_len = long_length ? 4 : 1; 1.140 + if(Offset(cursor, end) < len_len) { 1.141 + TokenizeError("cannot ReadString, out of bounds reading length",input, cursor); 1.142 + } 1.143 + 1.144 + const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end); 1.145 + 1.146 + if (Offset(cursor, end) < length) { 1.147 + TokenizeError("cannot ReadString, length is out of bounds",input, cursor); 1.148 + } 1.149 + 1.150 + sbegin_out = cursor; 1.151 + cursor += length; 1.152 + 1.153 + send_out = cursor; 1.154 + 1.155 + if(!allow_null) { 1.156 + for (unsigned int i = 0; i < length; ++i) { 1.157 + if(sbegin_out[i] == '\0') { 1.158 + TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor); 1.159 + } 1.160 + } 1.161 + } 1.162 + 1.163 + return length; 1.164 +} 1.165 + 1.166 + 1.167 + 1.168 +// ------------------------------------------------------------------------------------------------ 1.169 +void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end) 1.170 +{ 1.171 + if(Offset(cursor, end) < 1) { 1.172 + TokenizeError("cannot ReadData, out of bounds reading length",input, cursor); 1.173 + } 1.174 + 1.175 + const char type = *cursor; 1.176 + sbegin_out = cursor++; 1.177 + 1.178 + switch(type) 1.179 + { 1.180 + // 16 bit int 1.181 + case 'Y': 1.182 + cursor += 2; 1.183 + break; 1.184 + 1.185 + // 1 bit bool flag (yes/no) 1.186 + case 'C': 1.187 + cursor += 1; 1.188 + break; 1.189 + 1.190 + // 32 bit int 1.191 + case 'I': 1.192 + // <- fall thru 1.193 + 1.194 + // float 1.195 + case 'F': 1.196 + cursor += 4; 1.197 + break; 1.198 + 1.199 + // double 1.200 + case 'D': 1.201 + cursor += 8; 1.202 + break; 1.203 + 1.204 + // 64 bit int 1.205 + case 'L': 1.206 + cursor += 8; 1.207 + break; 1.208 + 1.209 + // note: do not write cursor += ReadWord(...cursor) as this would be UB 1.210 + 1.211 + // raw binary data 1.212 + case 'R': 1.213 + { 1.214 + const uint32_t length = ReadWord(input, cursor, end); 1.215 + cursor += length; 1.216 + break; 1.217 + } 1.218 + 1.219 + case 'b': 1.220 + // TODO: what is the 'b' type code? Right now we just skip over it / 1.221 + // take the full range we could get 1.222 + cursor = end; 1.223 + break; 1.224 + 1.225 + // array of * 1.226 + case 'f': 1.227 + case 'd': 1.228 + case 'l': 1.229 + case 'i': { 1.230 + 1.231 + const uint32_t length = ReadWord(input, cursor, end); 1.232 + const uint32_t encoding = ReadWord(input, cursor, end); 1.233 + 1.234 + const uint32_t comp_len = ReadWord(input, cursor, end); 1.235 + 1.236 + // compute length based on type and check against the stored value 1.237 + if(encoding == 0) { 1.238 + uint32_t stride; 1.239 + switch(type) 1.240 + { 1.241 + case 'f': 1.242 + case 'i': 1.243 + stride = 4; 1.244 + break; 1.245 + 1.246 + case 'd': 1.247 + case 'l': 1.248 + stride = 8; 1.249 + break; 1.250 + 1.251 + default: 1.252 + ai_assert(false); 1.253 + }; 1.254 + if(length * stride != comp_len) { 1.255 + TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor); 1.256 + } 1.257 + } 1.258 + // zip/deflate algorithm (encoding==1)? take given length. anything else? die 1.259 + else if (encoding != 1) { 1.260 + TokenizeError("cannot ReadData, unknown encoding",input, cursor); 1.261 + } 1.262 + cursor += comp_len; 1.263 + break; 1.264 + } 1.265 + 1.266 + // string 1.267 + case 'S': { 1.268 + const char* sb, *se; 1.269 + // 0 characters can legally happen in such strings 1.270 + ReadString(sb, se, input, cursor, end, true, true); 1.271 + break; 1.272 + } 1.273 + default: 1.274 + TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor); 1.275 + } 1.276 + 1.277 + if(cursor > end) { 1.278 + TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor); 1.279 + } 1.280 + 1.281 + // the type code is contained in the returned range 1.282 + send_out = cursor; 1.283 +} 1.284 + 1.285 + 1.286 +// ------------------------------------------------------------------------------------------------ 1.287 +bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end) 1.288 +{ 1.289 + // the first word contains the offset at which this block ends 1.290 + const uint32_t end_offset = ReadWord(input, cursor, end); 1.291 + 1.292 + // we may get 0 if reading reached the end of the file - 1.293 + // fbx files have a mysterious extra footer which I don't know 1.294 + // how to extract any information from, but at least it always 1.295 + // starts with a 0. 1.296 + if(!end_offset) { 1.297 + return false; 1.298 + } 1.299 + 1.300 + if(end_offset > Offset(input, end)) { 1.301 + TokenizeError("block offset is out of range",input, cursor); 1.302 + } 1.303 + else if(end_offset < Offset(input, cursor)) { 1.304 + TokenizeError("block offset is negative out of range",input, cursor); 1.305 + } 1.306 + 1.307 + // the second data word contains the number of properties in the scope 1.308 + const uint32_t prop_count = ReadWord(input, cursor, end); 1.309 + 1.310 + // the third data word contains the length of the property list 1.311 + const uint32_t prop_length = ReadWord(input, cursor, end); 1.312 + 1.313 + // now comes the name of the scope/key 1.314 + const char* sbeg, *send; 1.315 + ReadString(sbeg, send, input, cursor, end); 1.316 + 1.317 + output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) )); 1.318 + 1.319 + // now come the individual properties 1.320 + const char* begin_cursor = cursor; 1.321 + for (unsigned int i = 0; i < prop_count; ++i) { 1.322 + ReadData(sbeg, send, input, cursor, begin_cursor + prop_length); 1.323 + 1.324 + output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) )); 1.325 + 1.326 + if(i != prop_count-1) { 1.327 + output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) )); 1.328 + } 1.329 + } 1.330 + 1.331 + if (Offset(begin_cursor, cursor) != prop_length) { 1.332 + TokenizeError("property length not reached, something is wrong",input, cursor); 1.333 + } 1.334 + 1.335 + // at the end of each nested block, there is a NUL record to indicate 1.336 + // that the sub-scope exists (i.e. to distinguish between P: and P : {}) 1.337 + // this NUL record is 13 bytes long. 1.338 +#define BLOCK_SENTINEL_LENGTH 13 1.339 + 1.340 + if (Offset(input, cursor) < end_offset) { 1.341 + 1.342 + if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) { 1.343 + TokenizeError("insufficient padding bytes at block end",input, cursor); 1.344 + } 1.345 + 1.346 + output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) )); 1.347 + 1.348 + // XXX this is vulnerable to stack overflowing .. 1.349 + while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) { 1.350 + ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH); 1.351 + } 1.352 + output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) )); 1.353 + 1.354 + for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) { 1.355 + if(cursor[i] != '\0') { 1.356 + TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor); 1.357 + } 1.358 + } 1.359 + cursor += BLOCK_SENTINEL_LENGTH; 1.360 + } 1.361 + 1.362 + if (Offset(input, cursor) != end_offset) { 1.363 + TokenizeError("scope length not reached, something is wrong",input, cursor); 1.364 + } 1.365 + 1.366 + return true; 1.367 +} 1.368 + 1.369 + 1.370 +} 1.371 + 1.372 +// ------------------------------------------------------------------------------------------------ 1.373 +void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length) 1.374 +{ 1.375 + ai_assert(input); 1.376 + 1.377 + if(length < 0x1b) { 1.378 + TokenizeError("file is too short",0); 1.379 + } 1.380 + 1.381 + if (strncmp(input,"Kaydara FBX Binary",18)) { 1.382 + TokenizeError("magic bytes not found",0); 1.383 + } 1.384 + 1.385 + 1.386 + //uint32_t offset = 0x1b; 1.387 + 1.388 + const char* cursor = input + 0x1b; 1.389 + 1.390 + while (cursor < input + length) { 1.391 + if(!ReadScope(output_tokens, input, cursor, input + length)) { 1.392 + break; 1.393 + } 1.394 + } 1.395 +} 1.396 + 1.397 +} // !FBX 1.398 +} // !Assimp 1.399 + 1.400 +#endif 1.401 \ No newline at end of file