nuclear@0: /* nuclear@0: Open Asset Import Library (assimp) nuclear@0: ---------------------------------------------------------------------- nuclear@0: nuclear@0: Copyright (c) 2006-2012, assimp team nuclear@0: All rights reserved. nuclear@0: nuclear@0: Redistribution and use of this software in source and binary forms, nuclear@0: with or without modification, are permitted provided that the nuclear@0: following conditions are met: nuclear@0: nuclear@0: * Redistributions of source code must retain the above nuclear@0: copyright notice, this list of conditions and the nuclear@0: following disclaimer. nuclear@0: nuclear@0: * Redistributions in binary form must reproduce the above nuclear@0: copyright notice, this list of conditions and the nuclear@0: following disclaimer in the documentation and/or other nuclear@0: materials provided with the distribution. nuclear@0: nuclear@0: * Neither the name of the assimp team, nor the names of its nuclear@0: contributors may be used to endorse or promote products nuclear@0: derived from this software without specific prior nuclear@0: written permission of the assimp team. nuclear@0: nuclear@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS nuclear@0: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT nuclear@0: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR nuclear@0: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT nuclear@0: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, nuclear@0: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT nuclear@0: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, nuclear@0: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY nuclear@0: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT nuclear@0: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE nuclear@0: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. nuclear@0: nuclear@0: ---------------------------------------------------------------------- nuclear@0: */ nuclear@0: nuclear@0: /** @file FBXTokenizer.cpp nuclear@0: * @brief Implementation of the FBX broadphase lexer nuclear@0: */ nuclear@0: #include "AssimpPCH.h" nuclear@0: nuclear@0: #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER nuclear@0: nuclear@0: // tab width for logging columns nuclear@0: #define ASSIMP_FBX_TAB_WIDTH 4 nuclear@0: nuclear@0: #include "ParsingUtils.h" nuclear@0: nuclear@0: #include "FBXTokenizer.h" nuclear@0: #include "FBXUtil.h" nuclear@0: nuclear@0: namespace Assimp { nuclear@0: namespace FBX { nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column) nuclear@0: : sbegin(sbegin) nuclear@0: , send(send) nuclear@0: , type(type) nuclear@0: , line(line) nuclear@0: , column(column) nuclear@0: #ifdef DEBUG nuclear@0: , contents(sbegin, static_cast(send-sbegin)) nuclear@0: #endif nuclear@0: { nuclear@0: ai_assert(sbegin); nuclear@0: ai_assert(send); nuclear@0: nuclear@0: // tokens must be of non-zero length nuclear@0: ai_assert(static_cast(send-sbegin) > 0); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: Token::~Token() nuclear@0: { nuclear@0: } nuclear@0: nuclear@0: nuclear@0: namespace { nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. nuclear@0: void TokenizeError(const std::string& message, unsigned int line, unsigned int column) nuclear@0: { nuclear@0: throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column)); nuclear@0: } nuclear@0: nuclear@0: nuclear@0: // process a potential data token up to 'cur', adding it to 'output_tokens'. nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end, nuclear@0: unsigned int line, nuclear@0: unsigned int column, nuclear@0: TokenType type = TokenType_DATA, nuclear@0: bool must_have_token = false) nuclear@0: { nuclear@0: if (start && end) { nuclear@0: // sanity check: nuclear@0: // tokens should have no whitespace outside quoted text and [start,end] should nuclear@0: // properly delimit the valid range. nuclear@0: bool in_double_quotes = false; nuclear@0: for (const char* c = start; c != end + 1; ++c) { nuclear@0: if (*c == '\"') { nuclear@0: in_double_quotes = !in_double_quotes; nuclear@0: } nuclear@0: nuclear@0: if (!in_double_quotes && IsSpaceOrNewLine(*c)) { nuclear@0: TokenizeError("unexpected whitespace in token", line, column); nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: if (in_double_quotes) { nuclear@0: TokenizeError("non-terminated double quotes", line, column); nuclear@0: } nuclear@0: nuclear@0: output_tokens.push_back(new_Token(start,end + 1,type,line,column)); nuclear@0: } nuclear@0: else if (must_have_token) { nuclear@0: TokenizeError("unexpected character, expected data token", line, column); nuclear@0: } nuclear@0: nuclear@0: start = end = NULL; nuclear@0: } nuclear@0: nuclear@0: } nuclear@0: nuclear@0: // ------------------------------------------------------------------------------------------------ nuclear@0: void Tokenize(TokenList& output_tokens, const char* input) nuclear@0: { nuclear@0: ai_assert(input); nuclear@0: nuclear@0: // line and column numbers numbers are one-based nuclear@0: unsigned int line = 1; nuclear@0: unsigned int column = 1; nuclear@0: nuclear@0: bool comment = false; nuclear@0: bool in_double_quotes = false; nuclear@0: bool pending_data_token = false; nuclear@0: nuclear@0: const char* token_begin = NULL, *token_end = NULL; nuclear@0: for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) { nuclear@0: const char c = *cur; nuclear@0: nuclear@0: if (IsLineEnd(c)) { nuclear@0: comment = false; nuclear@0: nuclear@0: column = 0; nuclear@0: ++line; nuclear@0: } nuclear@0: nuclear@0: if(comment) { nuclear@0: continue; nuclear@0: } nuclear@0: nuclear@0: if(in_double_quotes) { nuclear@0: if (c == '\"') { nuclear@0: in_double_quotes = false; nuclear@0: token_end = cur; nuclear@0: nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end,line,column); nuclear@0: pending_data_token = false; nuclear@0: } nuclear@0: continue; nuclear@0: } nuclear@0: nuclear@0: switch(c) nuclear@0: { nuclear@0: case '\"': nuclear@0: if (token_begin) { nuclear@0: TokenizeError("unexpected double-quote", line, column); nuclear@0: } nuclear@0: token_begin = cur; nuclear@0: in_double_quotes = true; nuclear@0: continue; nuclear@0: nuclear@0: case ';': nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end,line,column); nuclear@0: comment = true; nuclear@0: continue; nuclear@0: nuclear@0: case '{': nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end, line, column); nuclear@0: output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); nuclear@0: continue; nuclear@0: nuclear@0: case '}': nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end,line,column); nuclear@0: output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); nuclear@0: continue; nuclear@0: nuclear@0: case ',': nuclear@0: if (pending_data_token) { nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true); nuclear@0: } nuclear@0: output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column)); nuclear@0: continue; nuclear@0: nuclear@0: case ':': nuclear@0: if (pending_data_token) { nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true); nuclear@0: } nuclear@0: else { nuclear@0: TokenizeError("unexpected colon", line, column); nuclear@0: } nuclear@0: continue; nuclear@0: } nuclear@0: nuclear@0: if (IsSpaceOrNewLine(c)) { nuclear@0: nuclear@0: if (token_begin) { nuclear@0: // peek ahead and check if the next token is a colon in which nuclear@0: // case this counts as KEY token. nuclear@0: TokenType type = TokenType_DATA; nuclear@0: for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) { nuclear@0: if (*peek == ':') { nuclear@0: type = TokenType_KEY; nuclear@0: cur = peek; nuclear@0: break; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: ProcessDataToken(output_tokens,token_begin,token_end,line,column,type); nuclear@0: } nuclear@0: nuclear@0: pending_data_token = false; nuclear@0: } nuclear@0: else { nuclear@0: token_end = cur; nuclear@0: if (!token_begin) { nuclear@0: token_begin = cur; nuclear@0: } nuclear@0: nuclear@0: pending_data_token = true; nuclear@0: } nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: } // !FBX nuclear@0: } // !Assimp nuclear@0: nuclear@0: #endif