vrshoot: b2f14e535253 libs/assimp/FBXBinaryTokenizer.cpp

vrshoot

view libs/assimp/FBXBinaryTokenizer.cpp @ 0:b2f14e535253

initial commit

author	John Tsiombikas <nuclear@member.fsf.org>
date	Sat, 01 Feb 2014 19:58:19 +0200
parents
children

line source

1 /*

2 Open Asset Import Library (assimp)

3 ----------------------------------------------------------------------

8 Redistribution and use of this software in source and binary forms,

9 with or without modification, are permitted provided that the

10 following conditions are met:

12 * Redistributions of source code must retain the above

13 copyright notice, this list of conditions and the

14 following disclaimer.

16 * Redistributions in binary form must reproduce the above

17 copyright notice, this list of conditions and the

18 following disclaimer in the documentation and/or other

19 materials provided with the distribution.

21 * Neither the name of the assimp team, nor the names of its

22 contributors may be used to endorse or promote products

23 derived from this software without specific prior

24 written permission of the assimp team.

26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

38 ----------------------------------------------------------------------

39 */

40 /** @file FBXBinaryTokenizer.cpp

41 * @brief Implementation of a fake lexer for binary fbx files -

42 * we emit tokens so the parser needs almost no special handling

43 * for binary files.

44 */

45 #include "AssimpPCH.h"

47 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER

49 #include "FBXTokenizer.h"

50 #include "FBXUtil.h"

52 namespace Assimp {

53 namespace FBX {

56 // ------------------------------------------------------------------------------------------------

57 Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)

58 : sbegin(sbegin)

59 , send(send)

60 , type(type)

61 , line(offset)

62 , column(BINARY_MARKER)

63 #ifdef DEBUG

64 , contents(sbegin, static_cast<size_t>(send-sbegin))

65 #endif

66 {

67 ai_assert(sbegin);

68 ai_assert(send);

70 // binary tokens may have zero length because they are sometimes dummies

71 // inserted by TokenizeBinary()

72 ai_assert(send >= sbegin);

73 }

76 namespace {

78 // ------------------------------------------------------------------------------------------------

79 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.

80 void TokenizeError(const std::string& message, unsigned int offset)

81 {

82 throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));

83 }

86 // ------------------------------------------------------------------------------------------------

87 uint32_t Offset(const char* begin, const char* cursor)

88 {

89 ai_assert(begin <= cursor);

90 return static_cast<unsigned int>(cursor - begin);

91 }

94 // ------------------------------------------------------------------------------------------------

95 void TokenizeError(const std::string& message, const char* begin, const char* cursor)

96 {

97 TokenizeError(message, Offset(begin, cursor));

98 }

100

101 // ------------------------------------------------------------------------------------------------

102 uint32_t ReadWord(const char* input, const char*& cursor, const char* end)

103 {

104 if(Offset(cursor, end) < 4) {

105 TokenizeError("cannot ReadWord, out of bounds",input, cursor);

106 }

107

108 uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);

109 AI_SWAP4(word);

110

111 cursor += 4;

112

113 return word;

114 }

115

116

117 // ------------------------------------------------------------------------------------------------

118 uint8_t ReadByte(const char* input, const char*& cursor, const char* end)

119 {

120 if(Offset(cursor, end) < 1) {

121 TokenizeError("cannot ReadByte, out of bounds",input, cursor);

122 }

123

124 uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);

125 ++cursor;

126

127 return word;

128 }

129

130

131 // ------------------------------------------------------------------------------------------------

132 unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,

133 bool long_length = false,

134 bool allow_null = false)

135 {

136 const uint32_t len_len = long_length ? 4 : 1;

137 if(Offset(cursor, end) < len_len) {

138 TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);

139 }

140

141 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);

142

143 if (Offset(cursor, end) < length) {

144 TokenizeError("cannot ReadString, length is out of bounds",input, cursor);

145 }

146

147 sbegin_out = cursor;

148 cursor += length;

149

150 send_out = cursor;

151

152 if(!allow_null) {

153 for (unsigned int i = 0; i < length; ++i) {

154 if(sbegin_out[i] == '\0') {

155 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);

156 }

157 }

158 }

159

160 return length;

161 }

162

163

164

165 // ------------------------------------------------------------------------------------------------

166 void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)

167 {

168 if(Offset(cursor, end) < 1) {

169 TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);

170 }

171

172 const char type = *cursor;

173 sbegin_out = cursor++;

174

175 switch(type)

176 {

177 // 16 bit int

178 case 'Y':

179 cursor += 2;

180 break;

181

182 // 1 bit bool flag (yes/no)

183 case 'C':

184 cursor += 1;

185 break;

186

187 // 32 bit int

188 case 'I':

189 // <- fall thru

190

191 // float

192 case 'F':

193 cursor += 4;

194 break;

195

196 // double

197 case 'D':

198 cursor += 8;

199 break;

200

201 // 64 bit int

202 case 'L':

203 cursor += 8;

204 break;

205

206 // note: do not write cursor += ReadWord(...cursor) as this would be UB

207

208 // raw binary data

209 case 'R':

210 {

211 const uint32_t length = ReadWord(input, cursor, end);

212 cursor += length;

213 break;

214 }

215

216 case 'b':

217 // TODO: what is the 'b' type code? Right now we just skip over it /

218 // take the full range we could get

219 cursor = end;

220 break;

221

222 // array of *

223 case 'f':

224 case 'd':

225 case 'l':

226 case 'i': {

227

228 const uint32_t length = ReadWord(input, cursor, end);

229 const uint32_t encoding = ReadWord(input, cursor, end);

230

231 const uint32_t comp_len = ReadWord(input, cursor, end);

232

233 // compute length based on type and check against the stored value

234 if(encoding == 0) {

235 uint32_t stride;

236 switch(type)

237 {

238 case 'f':

239 case 'i':

240 stride = 4;

241 break;

242

243 case 'd':

244 case 'l':

245 stride = 8;

246 break;

247

248 default:

249 ai_assert(false);

250 };

251 if(length * stride != comp_len) {

252 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);

253 }

254 }

255 // zip/deflate algorithm (encoding==1)? take given length. anything else? die

256 else if (encoding != 1) {

257 TokenizeError("cannot ReadData, unknown encoding",input, cursor);

258 }

259 cursor += comp_len;

260 break;

261 }

262

263 // string

264 case 'S': {

265 const char* sb, *se;

266 // 0 characters can legally happen in such strings

267 ReadString(sb, se, input, cursor, end, true, true);

268 break;

269 }

270 default:

271 TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);

272 }

273

274 if(cursor > end) {

275 TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);

276 }

277

278 // the type code is contained in the returned range

279 send_out = cursor;

280 }

281

282

283 // ------------------------------------------------------------------------------------------------

284 bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)

285 {

286 // the first word contains the offset at which this block ends

287 const uint32_t end_offset = ReadWord(input, cursor, end);

288

289 // we may get 0 if reading reached the end of the file -

290 // fbx files have a mysterious extra footer which I don't know

291 // how to extract any information from, but at least it always

292 // starts with a 0.

293 if(!end_offset) {

294 return false;

295 }

296

297 if(end_offset > Offset(input, end)) {

298 TokenizeError("block offset is out of range",input, cursor);

299 }

300 else if(end_offset < Offset(input, cursor)) {

301 TokenizeError("block offset is negative out of range",input, cursor);

302 }

303

304 // the second data word contains the number of properties in the scope

305 const uint32_t prop_count = ReadWord(input, cursor, end);

306

307 // the third data word contains the length of the property list

308 const uint32_t prop_length = ReadWord(input, cursor, end);

309

310 // now comes the name of the scope/key

311 const char* sbeg, *send;

312 ReadString(sbeg, send, input, cursor, end);

313

314 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));

315

316 // now come the individual properties

317 const char* begin_cursor = cursor;

318 for (unsigned int i = 0; i < prop_count; ++i) {

319 ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);

320

321 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));

322

323 if(i != prop_count-1) {

324 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));

325 }

326 }

327

328 if (Offset(begin_cursor, cursor) != prop_length) {

329 TokenizeError("property length not reached, something is wrong",input, cursor);

330 }

331

332 // at the end of each nested block, there is a NUL record to indicate

333 // that the sub-scope exists (i.e. to distinguish between P: and P : {})

334 // this NUL record is 13 bytes long.

335 #define BLOCK_SENTINEL_LENGTH 13

336

337 if (Offset(input, cursor) < end_offset) {

338

339 if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {

340 TokenizeError("insufficient padding bytes at block end",input, cursor);

341 }

342

343 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));

344

345 // XXX this is vulnerable to stack overflowing ..

346 while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {

347 ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);

348 }

349 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));

350

351 for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {

352 if(cursor[i] != '\0') {

353 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);

354 }

355 }

356 cursor += BLOCK_SENTINEL_LENGTH;

357 }

358

359 if (Offset(input, cursor) != end_offset) {

360 TokenizeError("scope length not reached, something is wrong",input, cursor);

361 }

362

363 return true;

364 }

365

366

367 }

368

369 // ------------------------------------------------------------------------------------------------

370 void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)

371 {

372 ai_assert(input);

373

374 if(length < 0x1b) {

375 TokenizeError("file is too short",0);

376 }

377

378 if (strncmp(input,"Kaydara FBX Binary",18)) {

379 TokenizeError("magic bytes not found",0);

380 }

381

382

383 //uint32_t offset = 0x1b;

384

385 const char* cursor = input + 0x1b;

386

387 while (cursor < input + length) {

388 if(!ReadScope(output_tokens, input, cursor, input + length)) {

389 break;

390 }

391 }

392 }

393

394 } // !FBX

395 } // !Assimp

396

397 #endif