vrshoot

annotate libs/assimp/FBXBinaryTokenizer.cpp @ 1:e7ca128b8713

looks nice :)
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 02 Feb 2014 00:35:22 +0200
parents
children
rev   line source
nuclear@0 1 /*
nuclear@0 2 Open Asset Import Library (assimp)
nuclear@0 3 ----------------------------------------------------------------------
nuclear@0 4
nuclear@0 5 Copyright (c) 2006-2012, assimp team
nuclear@0 6 All rights reserved.
nuclear@0 7
nuclear@0 8 Redistribution and use of this software in source and binary forms,
nuclear@0 9 with or without modification, are permitted provided that the
nuclear@0 10 following conditions are met:
nuclear@0 11
nuclear@0 12 * Redistributions of source code must retain the above
nuclear@0 13 copyright notice, this list of conditions and the
nuclear@0 14 following disclaimer.
nuclear@0 15
nuclear@0 16 * Redistributions in binary form must reproduce the above
nuclear@0 17 copyright notice, this list of conditions and the
nuclear@0 18 following disclaimer in the documentation and/or other
nuclear@0 19 materials provided with the distribution.
nuclear@0 20
nuclear@0 21 * Neither the name of the assimp team, nor the names of its
nuclear@0 22 contributors may be used to endorse or promote products
nuclear@0 23 derived from this software without specific prior
nuclear@0 24 written permission of the assimp team.
nuclear@0 25
nuclear@0 26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
nuclear@0 27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
nuclear@0 28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
nuclear@0 29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
nuclear@0 30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
nuclear@0 31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
nuclear@0 32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
nuclear@0 33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
nuclear@0 34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
nuclear@0 35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
nuclear@0 36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
nuclear@0 37
nuclear@0 38 ----------------------------------------------------------------------
nuclear@0 39 */
nuclear@0 40 /** @file FBXBinaryTokenizer.cpp
nuclear@0 41 * @brief Implementation of a fake lexer for binary fbx files -
nuclear@0 42 * we emit tokens so the parser needs almost no special handling
nuclear@0 43 * for binary files.
nuclear@0 44 */
nuclear@0 45 #include "AssimpPCH.h"
nuclear@0 46
nuclear@0 47 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
nuclear@0 48
nuclear@0 49 #include "FBXTokenizer.h"
nuclear@0 50 #include "FBXUtil.h"
nuclear@0 51
nuclear@0 52 namespace Assimp {
nuclear@0 53 namespace FBX {
nuclear@0 54
nuclear@0 55
nuclear@0 56 // ------------------------------------------------------------------------------------------------
nuclear@0 57 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
nuclear@0 58 : sbegin(sbegin)
nuclear@0 59 , send(send)
nuclear@0 60 , type(type)
nuclear@0 61 , line(offset)
nuclear@0 62 , column(BINARY_MARKER)
nuclear@0 63 #ifdef DEBUG
nuclear@0 64 , contents(sbegin, static_cast<size_t>(send-sbegin))
nuclear@0 65 #endif
nuclear@0 66 {
nuclear@0 67 ai_assert(sbegin);
nuclear@0 68 ai_assert(send);
nuclear@0 69
nuclear@0 70 // binary tokens may have zero length because they are sometimes dummies
nuclear@0 71 // inserted by TokenizeBinary()
nuclear@0 72 ai_assert(send >= sbegin);
nuclear@0 73 }
nuclear@0 74
nuclear@0 75
nuclear@0 76 namespace {
nuclear@0 77
nuclear@0 78 // ------------------------------------------------------------------------------------------------
nuclear@0 79 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
nuclear@0 80 void TokenizeError(const std::string& message, unsigned int offset)
nuclear@0 81 {
nuclear@0 82 throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
nuclear@0 83 }
nuclear@0 84
nuclear@0 85
nuclear@0 86 // ------------------------------------------------------------------------------------------------
nuclear@0 87 uint32_t Offset(const char* begin, const char* cursor)
nuclear@0 88 {
nuclear@0 89 ai_assert(begin <= cursor);
nuclear@0 90 return static_cast<unsigned int>(cursor - begin);
nuclear@0 91 }
nuclear@0 92
nuclear@0 93
nuclear@0 94 // ------------------------------------------------------------------------------------------------
nuclear@0 95 void TokenizeError(const std::string& message, const char* begin, const char* cursor)
nuclear@0 96 {
nuclear@0 97 TokenizeError(message, Offset(begin, cursor));
nuclear@0 98 }
nuclear@0 99
nuclear@0 100
nuclear@0 101 // ------------------------------------------------------------------------------------------------
nuclear@0 102 uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
nuclear@0 103 {
nuclear@0 104 if(Offset(cursor, end) < 4) {
nuclear@0 105 TokenizeError("cannot ReadWord, out of bounds",input, cursor);
nuclear@0 106 }
nuclear@0 107
nuclear@0 108 uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
nuclear@0 109 AI_SWAP4(word);
nuclear@0 110
nuclear@0 111 cursor += 4;
nuclear@0 112
nuclear@0 113 return word;
nuclear@0 114 }
nuclear@0 115
nuclear@0 116
nuclear@0 117 // ------------------------------------------------------------------------------------------------
nuclear@0 118 uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
nuclear@0 119 {
nuclear@0 120 if(Offset(cursor, end) < 1) {
nuclear@0 121 TokenizeError("cannot ReadByte, out of bounds",input, cursor);
nuclear@0 122 }
nuclear@0 123
nuclear@0 124 uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
nuclear@0 125 ++cursor;
nuclear@0 126
nuclear@0 127 return word;
nuclear@0 128 }
nuclear@0 129
nuclear@0 130
nuclear@0 131 // ------------------------------------------------------------------------------------------------
nuclear@0 132 unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
nuclear@0 133 bool long_length = false,
nuclear@0 134 bool allow_null = false)
nuclear@0 135 {
nuclear@0 136 const uint32_t len_len = long_length ? 4 : 1;
nuclear@0 137 if(Offset(cursor, end) < len_len) {
nuclear@0 138 TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
nuclear@0 139 }
nuclear@0 140
nuclear@0 141 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
nuclear@0 142
nuclear@0 143 if (Offset(cursor, end) < length) {
nuclear@0 144 TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
nuclear@0 145 }
nuclear@0 146
nuclear@0 147 sbegin_out = cursor;
nuclear@0 148 cursor += length;
nuclear@0 149
nuclear@0 150 send_out = cursor;
nuclear@0 151
nuclear@0 152 if(!allow_null) {
nuclear@0 153 for (unsigned int i = 0; i < length; ++i) {
nuclear@0 154 if(sbegin_out[i] == '\0') {
nuclear@0 155 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
nuclear@0 156 }
nuclear@0 157 }
nuclear@0 158 }
nuclear@0 159
nuclear@0 160 return length;
nuclear@0 161 }
nuclear@0 162
nuclear@0 163
nuclear@0 164
nuclear@0 165 // ------------------------------------------------------------------------------------------------
nuclear@0 166 void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
nuclear@0 167 {
nuclear@0 168 if(Offset(cursor, end) < 1) {
nuclear@0 169 TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
nuclear@0 170 }
nuclear@0 171
nuclear@0 172 const char type = *cursor;
nuclear@0 173 sbegin_out = cursor++;
nuclear@0 174
nuclear@0 175 switch(type)
nuclear@0 176 {
nuclear@0 177 // 16 bit int
nuclear@0 178 case 'Y':
nuclear@0 179 cursor += 2;
nuclear@0 180 break;
nuclear@0 181
nuclear@0 182 // 1 bit bool flag (yes/no)
nuclear@0 183 case 'C':
nuclear@0 184 cursor += 1;
nuclear@0 185 break;
nuclear@0 186
nuclear@0 187 // 32 bit int
nuclear@0 188 case 'I':
nuclear@0 189 // <- fall thru
nuclear@0 190
nuclear@0 191 // float
nuclear@0 192 case 'F':
nuclear@0 193 cursor += 4;
nuclear@0 194 break;
nuclear@0 195
nuclear@0 196 // double
nuclear@0 197 case 'D':
nuclear@0 198 cursor += 8;
nuclear@0 199 break;
nuclear@0 200
nuclear@0 201 // 64 bit int
nuclear@0 202 case 'L':
nuclear@0 203 cursor += 8;
nuclear@0 204 break;
nuclear@0 205
nuclear@0 206 // note: do not write cursor += ReadWord(...cursor) as this would be UB
nuclear@0 207
nuclear@0 208 // raw binary data
nuclear@0 209 case 'R':
nuclear@0 210 {
nuclear@0 211 const uint32_t length = ReadWord(input, cursor, end);
nuclear@0 212 cursor += length;
nuclear@0 213 break;
nuclear@0 214 }
nuclear@0 215
nuclear@0 216 case 'b':
nuclear@0 217 // TODO: what is the 'b' type code? Right now we just skip over it /
nuclear@0 218 // take the full range we could get
nuclear@0 219 cursor = end;
nuclear@0 220 break;
nuclear@0 221
nuclear@0 222 // array of *
nuclear@0 223 case 'f':
nuclear@0 224 case 'd':
nuclear@0 225 case 'l':
nuclear@0 226 case 'i': {
nuclear@0 227
nuclear@0 228 const uint32_t length = ReadWord(input, cursor, end);
nuclear@0 229 const uint32_t encoding = ReadWord(input, cursor, end);
nuclear@0 230
nuclear@0 231 const uint32_t comp_len = ReadWord(input, cursor, end);
nuclear@0 232
nuclear@0 233 // compute length based on type and check against the stored value
nuclear@0 234 if(encoding == 0) {
nuclear@0 235 uint32_t stride;
nuclear@0 236 switch(type)
nuclear@0 237 {
nuclear@0 238 case 'f':
nuclear@0 239 case 'i':
nuclear@0 240 stride = 4;
nuclear@0 241 break;
nuclear@0 242
nuclear@0 243 case 'd':
nuclear@0 244 case 'l':
nuclear@0 245 stride = 8;
nuclear@0 246 break;
nuclear@0 247
nuclear@0 248 default:
nuclear@0 249 ai_assert(false);
nuclear@0 250 };
nuclear@0 251 if(length * stride != comp_len) {
nuclear@0 252 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
nuclear@0 253 }
nuclear@0 254 }
nuclear@0 255 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
nuclear@0 256 else if (encoding != 1) {
nuclear@0 257 TokenizeError("cannot ReadData, unknown encoding",input, cursor);
nuclear@0 258 }
nuclear@0 259 cursor += comp_len;
nuclear@0 260 break;
nuclear@0 261 }
nuclear@0 262
nuclear@0 263 // string
nuclear@0 264 case 'S': {
nuclear@0 265 const char* sb, *se;
nuclear@0 266 // 0 characters can legally happen in such strings
nuclear@0 267 ReadString(sb, se, input, cursor, end, true, true);
nuclear@0 268 break;
nuclear@0 269 }
nuclear@0 270 default:
nuclear@0 271 TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
nuclear@0 272 }
nuclear@0 273
nuclear@0 274 if(cursor > end) {
nuclear@0 275 TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
nuclear@0 276 }
nuclear@0 277
nuclear@0 278 // the type code is contained in the returned range
nuclear@0 279 send_out = cursor;
nuclear@0 280 }
nuclear@0 281
nuclear@0 282
nuclear@0 283 // ------------------------------------------------------------------------------------------------
nuclear@0 284 bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
nuclear@0 285 {
nuclear@0 286 // the first word contains the offset at which this block ends
nuclear@0 287 const uint32_t end_offset = ReadWord(input, cursor, end);
nuclear@0 288
nuclear@0 289 // we may get 0 if reading reached the end of the file -
nuclear@0 290 // fbx files have a mysterious extra footer which I don't know
nuclear@0 291 // how to extract any information from, but at least it always
nuclear@0 292 // starts with a 0.
nuclear@0 293 if(!end_offset) {
nuclear@0 294 return false;
nuclear@0 295 }
nuclear@0 296
nuclear@0 297 if(end_offset > Offset(input, end)) {
nuclear@0 298 TokenizeError("block offset is out of range",input, cursor);
nuclear@0 299 }
nuclear@0 300 else if(end_offset < Offset(input, cursor)) {
nuclear@0 301 TokenizeError("block offset is negative out of range",input, cursor);
nuclear@0 302 }
nuclear@0 303
nuclear@0 304 // the second data word contains the number of properties in the scope
nuclear@0 305 const uint32_t prop_count = ReadWord(input, cursor, end);
nuclear@0 306
nuclear@0 307 // the third data word contains the length of the property list
nuclear@0 308 const uint32_t prop_length = ReadWord(input, cursor, end);
nuclear@0 309
nuclear@0 310 // now comes the name of the scope/key
nuclear@0 311 const char* sbeg, *send;
nuclear@0 312 ReadString(sbeg, send, input, cursor, end);
nuclear@0 313
nuclear@0 314 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
nuclear@0 315
nuclear@0 316 // now come the individual properties
nuclear@0 317 const char* begin_cursor = cursor;
nuclear@0 318 for (unsigned int i = 0; i < prop_count; ++i) {
nuclear@0 319 ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
nuclear@0 320
nuclear@0 321 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
nuclear@0 322
nuclear@0 323 if(i != prop_count-1) {
nuclear@0 324 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
nuclear@0 325 }
nuclear@0 326 }
nuclear@0 327
nuclear@0 328 if (Offset(begin_cursor, cursor) != prop_length) {
nuclear@0 329 TokenizeError("property length not reached, something is wrong",input, cursor);
nuclear@0 330 }
nuclear@0 331
nuclear@0 332 // at the end of each nested block, there is a NUL record to indicate
nuclear@0 333 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
nuclear@0 334 // this NUL record is 13 bytes long.
nuclear@0 335 #define BLOCK_SENTINEL_LENGTH 13
nuclear@0 336
nuclear@0 337 if (Offset(input, cursor) < end_offset) {
nuclear@0 338
nuclear@0 339 if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
nuclear@0 340 TokenizeError("insufficient padding bytes at block end",input, cursor);
nuclear@0 341 }
nuclear@0 342
nuclear@0 343 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
nuclear@0 344
nuclear@0 345 // XXX this is vulnerable to stack overflowing ..
nuclear@0 346 while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
nuclear@0 347 ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
nuclear@0 348 }
nuclear@0 349 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
nuclear@0 350
nuclear@0 351 for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
nuclear@0 352 if(cursor[i] != '\0') {
nuclear@0 353 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
nuclear@0 354 }
nuclear@0 355 }
nuclear@0 356 cursor += BLOCK_SENTINEL_LENGTH;
nuclear@0 357 }
nuclear@0 358
nuclear@0 359 if (Offset(input, cursor) != end_offset) {
nuclear@0 360 TokenizeError("scope length not reached, something is wrong",input, cursor);
nuclear@0 361 }
nuclear@0 362
nuclear@0 363 return true;
nuclear@0 364 }
nuclear@0 365
nuclear@0 366
nuclear@0 367 }
nuclear@0 368
nuclear@0 369 // ------------------------------------------------------------------------------------------------
nuclear@0 370 void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
nuclear@0 371 {
nuclear@0 372 ai_assert(input);
nuclear@0 373
nuclear@0 374 if(length < 0x1b) {
nuclear@0 375 TokenizeError("file is too short",0);
nuclear@0 376 }
nuclear@0 377
nuclear@0 378 if (strncmp(input,"Kaydara FBX Binary",18)) {
nuclear@0 379 TokenizeError("magic bytes not found",0);
nuclear@0 380 }
nuclear@0 381
nuclear@0 382
nuclear@0 383 //uint32_t offset = 0x1b;
nuclear@0 384
nuclear@0 385 const char* cursor = input + 0x1b;
nuclear@0 386
nuclear@0 387 while (cursor < input + length) {
nuclear@0 388 if(!ReadScope(output_tokens, input, cursor, input + length)) {
nuclear@0 389 break;
nuclear@0 390 }
nuclear@0 391 }
nuclear@0 392 }
nuclear@0 393
nuclear@0 394 } // !FBX
nuclear@0 395 } // !Assimp
nuclear@0 396
nuclear@0 397 #endif