vrshoot

diff libs/vorbis/mdct.c @ 0:b2f14e535253

initial commit
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 01 Feb 2014 19:58:19 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libs/vorbis/mdct.c	Sat Feb 01 19:58:19 2014 +0200
     1.3 @@ -0,0 +1,563 @@
     1.4 +/********************************************************************
     1.5 + *                                                                  *
     1.6 + * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
     1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
    1.10 + *                                                                  *
    1.11 + * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
    1.12 + * by the Xiph.Org Foundation http://www.xiph.org/                  *
    1.13 + *                                                                  *
    1.14 + ********************************************************************
    1.15 +
    1.16 + function: normalized modified discrete cosine transform
    1.17 +           power of two length transform only [64 <= n ]
    1.18 + last mod: $Id: mdct.c 16227 2009-07-08 06:58:46Z xiphmont $
    1.19 +
    1.20 + Original algorithm adapted long ago from _The use of multirate filter
    1.21 + banks for coding of high quality digital audio_, by T. Sporer,
    1.22 + K. Brandenburg and B. Edler, collection of the European Signal
    1.23 + Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
    1.24 + 211-214
    1.25 +
    1.26 + The below code implements an algorithm that no longer looks much like
    1.27 + that presented in the paper, but the basic structure remains if you
    1.28 + dig deep enough to see it.
    1.29 +
    1.30 + This module DOES NOT INCLUDE code to generate/apply the window
    1.31 + function.  Everybody has their own weird favorite including me... I
    1.32 + happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
    1.33 + vehemently disagree.
    1.34 +
    1.35 + ********************************************************************/
    1.36 +
    1.37 +/* this can also be run as an integer transform by uncommenting a
    1.38 +   define in mdct.h; the integerization is a first pass and although
    1.39 +   it's likely stable for Vorbis, the dynamic range is constrained and
    1.40 +   roundoff isn't done (so it's noisy).  Consider it functional, but
    1.41 +   only a starting point.  There's no point on a machine with an FPU */
    1.42 +
    1.43 +#include <stdio.h>
    1.44 +#include <stdlib.h>
    1.45 +#include <string.h>
    1.46 +#include <math.h>
    1.47 +#include "vorbis/codec.h"
    1.48 +#include "mdct.h"
    1.49 +#include "os.h"
    1.50 +#include "misc.h"
    1.51 +
    1.52 +/* build lookups for trig functions; also pre-figure scaling and
    1.53 +   some window function algebra. */
    1.54 +
    1.55 +void mdct_init(mdct_lookup *lookup,int n){
    1.56 +  int   *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4));
    1.57 +  DATA_TYPE *T=_ogg_malloc(sizeof(*T)*(n+n/4));
    1.58 +
    1.59 +  int i;
    1.60 +  int n2=n>>1;
    1.61 +  int log2n=lookup->log2n=rint(log((float)n)/log(2.f));
    1.62 +  lookup->n=n;
    1.63 +  lookup->trig=T;
    1.64 +  lookup->bitrev=bitrev;
    1.65 +
    1.66 +/* trig lookups... */
    1.67 +
    1.68 +  for(i=0;i<n/4;i++){
    1.69 +    T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i)));
    1.70 +    T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i)));
    1.71 +    T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1)));
    1.72 +    T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1)));
    1.73 +  }
    1.74 +  for(i=0;i<n/8;i++){
    1.75 +    T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5);
    1.76 +    T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5);
    1.77 +  }
    1.78 +
    1.79 +  /* bitreverse lookup... */
    1.80 +
    1.81 +  {
    1.82 +    int mask=(1<<(log2n-1))-1,i,j;
    1.83 +    int msb=1<<(log2n-2);
    1.84 +    for(i=0;i<n/8;i++){
    1.85 +      int acc=0;
    1.86 +      for(j=0;msb>>j;j++)
    1.87 +        if((msb>>j)&i)acc|=1<<j;
    1.88 +      bitrev[i*2]=((~acc)&mask)-1;
    1.89 +      bitrev[i*2+1]=acc;
    1.90 +
    1.91 +    }
    1.92 +  }
    1.93 +  lookup->scale=FLOAT_CONV(4.f/n);
    1.94 +}
    1.95 +
    1.96 +/* 8 point butterfly (in place, 4 register) */
    1.97 +STIN void mdct_butterfly_8(DATA_TYPE *x){
    1.98 +  REG_TYPE r0   = x[6] + x[2];
    1.99 +  REG_TYPE r1   = x[6] - x[2];
   1.100 +  REG_TYPE r2   = x[4] + x[0];
   1.101 +  REG_TYPE r3   = x[4] - x[0];
   1.102 +
   1.103 +           x[6] = r0   + r2;
   1.104 +           x[4] = r0   - r2;
   1.105 +
   1.106 +           r0   = x[5] - x[1];
   1.107 +           r2   = x[7] - x[3];
   1.108 +           x[0] = r1   + r0;
   1.109 +           x[2] = r1   - r0;
   1.110 +
   1.111 +           r0   = x[5] + x[1];
   1.112 +           r1   = x[7] + x[3];
   1.113 +           x[3] = r2   + r3;
   1.114 +           x[1] = r2   - r3;
   1.115 +           x[7] = r1   + r0;
   1.116 +           x[5] = r1   - r0;
   1.117 +
   1.118 +}
   1.119 +
   1.120 +/* 16 point butterfly (in place, 4 register) */
   1.121 +STIN void mdct_butterfly_16(DATA_TYPE *x){
   1.122 +  REG_TYPE r0     = x[1]  - x[9];
   1.123 +  REG_TYPE r1     = x[0]  - x[8];
   1.124 +
   1.125 +           x[8]  += x[0];
   1.126 +           x[9]  += x[1];
   1.127 +           x[0]   = MULT_NORM((r0   + r1) * cPI2_8);
   1.128 +           x[1]   = MULT_NORM((r0   - r1) * cPI2_8);
   1.129 +
   1.130 +           r0     = x[3]  - x[11];
   1.131 +           r1     = x[10] - x[2];
   1.132 +           x[10] += x[2];
   1.133 +           x[11] += x[3];
   1.134 +           x[2]   = r0;
   1.135 +           x[3]   = r1;
   1.136 +
   1.137 +           r0     = x[12] - x[4];
   1.138 +           r1     = x[13] - x[5];
   1.139 +           x[12] += x[4];
   1.140 +           x[13] += x[5];
   1.141 +           x[4]   = MULT_NORM((r0   - r1) * cPI2_8);
   1.142 +           x[5]   = MULT_NORM((r0   + r1) * cPI2_8);
   1.143 +
   1.144 +           r0     = x[14] - x[6];
   1.145 +           r1     = x[15] - x[7];
   1.146 +           x[14] += x[6];
   1.147 +           x[15] += x[7];
   1.148 +           x[6]  = r0;
   1.149 +           x[7]  = r1;
   1.150 +
   1.151 +           mdct_butterfly_8(x);
   1.152 +           mdct_butterfly_8(x+8);
   1.153 +}
   1.154 +
   1.155 +/* 32 point butterfly (in place, 4 register) */
   1.156 +STIN void mdct_butterfly_32(DATA_TYPE *x){
   1.157 +  REG_TYPE r0     = x[30] - x[14];
   1.158 +  REG_TYPE r1     = x[31] - x[15];
   1.159 +
   1.160 +           x[30] +=         x[14];
   1.161 +           x[31] +=         x[15];
   1.162 +           x[14]  =         r0;
   1.163 +           x[15]  =         r1;
   1.164 +
   1.165 +           r0     = x[28] - x[12];
   1.166 +           r1     = x[29] - x[13];
   1.167 +           x[28] +=         x[12];
   1.168 +           x[29] +=         x[13];
   1.169 +           x[12]  = MULT_NORM( r0 * cPI1_8  -  r1 * cPI3_8 );
   1.170 +           x[13]  = MULT_NORM( r0 * cPI3_8  +  r1 * cPI1_8 );
   1.171 +
   1.172 +           r0     = x[26] - x[10];
   1.173 +           r1     = x[27] - x[11];
   1.174 +           x[26] +=         x[10];
   1.175 +           x[27] +=         x[11];
   1.176 +           x[10]  = MULT_NORM(( r0  - r1 ) * cPI2_8);
   1.177 +           x[11]  = MULT_NORM(( r0  + r1 ) * cPI2_8);
   1.178 +
   1.179 +           r0     = x[24] - x[8];
   1.180 +           r1     = x[25] - x[9];
   1.181 +           x[24] += x[8];
   1.182 +           x[25] += x[9];
   1.183 +           x[8]   = MULT_NORM( r0 * cPI3_8  -  r1 * cPI1_8 );
   1.184 +           x[9]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
   1.185 +
   1.186 +           r0     = x[22] - x[6];
   1.187 +           r1     = x[7]  - x[23];
   1.188 +           x[22] += x[6];
   1.189 +           x[23] += x[7];
   1.190 +           x[6]   = r1;
   1.191 +           x[7]   = r0;
   1.192 +
   1.193 +           r0     = x[4]  - x[20];
   1.194 +           r1     = x[5]  - x[21];
   1.195 +           x[20] += x[4];
   1.196 +           x[21] += x[5];
   1.197 +           x[4]   = MULT_NORM( r1 * cPI1_8  +  r0 * cPI3_8 );
   1.198 +           x[5]   = MULT_NORM( r1 * cPI3_8  -  r0 * cPI1_8 );
   1.199 +
   1.200 +           r0     = x[2]  - x[18];
   1.201 +           r1     = x[3]  - x[19];
   1.202 +           x[18] += x[2];
   1.203 +           x[19] += x[3];
   1.204 +           x[2]   = MULT_NORM(( r1  + r0 ) * cPI2_8);
   1.205 +           x[3]   = MULT_NORM(( r1  - r0 ) * cPI2_8);
   1.206 +
   1.207 +           r0     = x[0]  - x[16];
   1.208 +           r1     = x[1]  - x[17];
   1.209 +           x[16] += x[0];
   1.210 +           x[17] += x[1];
   1.211 +           x[0]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
   1.212 +           x[1]   = MULT_NORM( r1 * cPI1_8  -  r0 * cPI3_8 );
   1.213 +
   1.214 +           mdct_butterfly_16(x);
   1.215 +           mdct_butterfly_16(x+16);
   1.216 +
   1.217 +}
   1.218 +
   1.219 +/* N point first stage butterfly (in place, 2 register) */
   1.220 +STIN void mdct_butterfly_first(DATA_TYPE *T,
   1.221 +                                        DATA_TYPE *x,
   1.222 +                                        int points){
   1.223 +
   1.224 +  DATA_TYPE *x1        = x          + points      - 8;
   1.225 +  DATA_TYPE *x2        = x          + (points>>1) - 8;
   1.226 +  REG_TYPE   r0;
   1.227 +  REG_TYPE   r1;
   1.228 +
   1.229 +  do{
   1.230 +
   1.231 +               r0      = x1[6]      -  x2[6];
   1.232 +               r1      = x1[7]      -  x2[7];
   1.233 +               x1[6]  += x2[6];
   1.234 +               x1[7]  += x2[7];
   1.235 +               x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
   1.236 +               x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
   1.237 +
   1.238 +               r0      = x1[4]      -  x2[4];
   1.239 +               r1      = x1[5]      -  x2[5];
   1.240 +               x1[4]  += x2[4];
   1.241 +               x1[5]  += x2[5];
   1.242 +               x2[4]   = MULT_NORM(r1 * T[5]  +  r0 * T[4]);
   1.243 +               x2[5]   = MULT_NORM(r1 * T[4]  -  r0 * T[5]);
   1.244 +
   1.245 +               r0      = x1[2]      -  x2[2];
   1.246 +               r1      = x1[3]      -  x2[3];
   1.247 +               x1[2]  += x2[2];
   1.248 +               x1[3]  += x2[3];
   1.249 +               x2[2]   = MULT_NORM(r1 * T[9]  +  r0 * T[8]);
   1.250 +               x2[3]   = MULT_NORM(r1 * T[8]  -  r0 * T[9]);
   1.251 +
   1.252 +               r0      = x1[0]      -  x2[0];
   1.253 +               r1      = x1[1]      -  x2[1];
   1.254 +               x1[0]  += x2[0];
   1.255 +               x1[1]  += x2[1];
   1.256 +               x2[0]   = MULT_NORM(r1 * T[13] +  r0 * T[12]);
   1.257 +               x2[1]   = MULT_NORM(r1 * T[12] -  r0 * T[13]);
   1.258 +
   1.259 +    x1-=8;
   1.260 +    x2-=8;
   1.261 +    T+=16;
   1.262 +
   1.263 +  }while(x2>=x);
   1.264 +}
   1.265 +
   1.266 +/* N/stage point generic N stage butterfly (in place, 2 register) */
   1.267 +STIN void mdct_butterfly_generic(DATA_TYPE *T,
   1.268 +                                          DATA_TYPE *x,
   1.269 +                                          int points,
   1.270 +                                          int trigint){
   1.271 +
   1.272 +  DATA_TYPE *x1        = x          + points      - 8;
   1.273 +  DATA_TYPE *x2        = x          + (points>>1) - 8;
   1.274 +  REG_TYPE   r0;
   1.275 +  REG_TYPE   r1;
   1.276 +
   1.277 +  do{
   1.278 +
   1.279 +               r0      = x1[6]      -  x2[6];
   1.280 +               r1      = x1[7]      -  x2[7];
   1.281 +               x1[6]  += x2[6];
   1.282 +               x1[7]  += x2[7];
   1.283 +               x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
   1.284 +               x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
   1.285 +
   1.286 +               T+=trigint;
   1.287 +
   1.288 +               r0      = x1[4]      -  x2[4];
   1.289 +               r1      = x1[5]      -  x2[5];
   1.290 +               x1[4]  += x2[4];
   1.291 +               x1[5]  += x2[5];
   1.292 +               x2[4]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
   1.293 +               x2[5]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
   1.294 +
   1.295 +               T+=trigint;
   1.296 +
   1.297 +               r0      = x1[2]      -  x2[2];
   1.298 +               r1      = x1[3]      -  x2[3];
   1.299 +               x1[2]  += x2[2];
   1.300 +               x1[3]  += x2[3];
   1.301 +               x2[2]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
   1.302 +               x2[3]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
   1.303 +
   1.304 +               T+=trigint;
   1.305 +
   1.306 +               r0      = x1[0]      -  x2[0];
   1.307 +               r1      = x1[1]      -  x2[1];
   1.308 +               x1[0]  += x2[0];
   1.309 +               x1[1]  += x2[1];
   1.310 +               x2[0]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
   1.311 +               x2[1]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
   1.312 +
   1.313 +               T+=trigint;
   1.314 +    x1-=8;
   1.315 +    x2-=8;
   1.316 +
   1.317 +  }while(x2>=x);
   1.318 +}
   1.319 +
   1.320 +STIN void mdct_butterflies(mdct_lookup *init,
   1.321 +                             DATA_TYPE *x,
   1.322 +                             int points){
   1.323 +
   1.324 +  DATA_TYPE *T=init->trig;
   1.325 +  int stages=init->log2n-5;
   1.326 +  int i,j;
   1.327 +
   1.328 +  if(--stages>0){
   1.329 +    mdct_butterfly_first(T,x,points);
   1.330 +  }
   1.331 +
   1.332 +  for(i=1;--stages>0;i++){
   1.333 +    for(j=0;j<(1<<i);j++)
   1.334 +      mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
   1.335 +  }
   1.336 +
   1.337 +  for(j=0;j<points;j+=32)
   1.338 +    mdct_butterfly_32(x+j);
   1.339 +
   1.340 +}
   1.341 +
   1.342 +void mdct_clear(mdct_lookup *l){
   1.343 +  if(l){
   1.344 +    if(l->trig)_ogg_free(l->trig);
   1.345 +    if(l->bitrev)_ogg_free(l->bitrev);
   1.346 +    memset(l,0,sizeof(*l));
   1.347 +  }
   1.348 +}
   1.349 +
   1.350 +STIN void mdct_bitreverse(mdct_lookup *init,
   1.351 +                            DATA_TYPE *x){
   1.352 +  int        n       = init->n;
   1.353 +  int       *bit     = init->bitrev;
   1.354 +  DATA_TYPE *w0      = x;
   1.355 +  DATA_TYPE *w1      = x = w0+(n>>1);
   1.356 +  DATA_TYPE *T       = init->trig+n;
   1.357 +
   1.358 +  do{
   1.359 +    DATA_TYPE *x0    = x+bit[0];
   1.360 +    DATA_TYPE *x1    = x+bit[1];
   1.361 +
   1.362 +    REG_TYPE  r0     = x0[1]  - x1[1];
   1.363 +    REG_TYPE  r1     = x0[0]  + x1[0];
   1.364 +    REG_TYPE  r2     = MULT_NORM(r1     * T[0]   + r0 * T[1]);
   1.365 +    REG_TYPE  r3     = MULT_NORM(r1     * T[1]   - r0 * T[0]);
   1.366 +
   1.367 +              w1    -= 4;
   1.368 +
   1.369 +              r0     = HALVE(x0[1] + x1[1]);
   1.370 +              r1     = HALVE(x0[0] - x1[0]);
   1.371 +
   1.372 +              w0[0]  = r0     + r2;
   1.373 +              w1[2]  = r0     - r2;
   1.374 +              w0[1]  = r1     + r3;
   1.375 +              w1[3]  = r3     - r1;
   1.376 +
   1.377 +              x0     = x+bit[2];
   1.378 +              x1     = x+bit[3];
   1.379 +
   1.380 +              r0     = x0[1]  - x1[1];
   1.381 +              r1     = x0[0]  + x1[0];
   1.382 +              r2     = MULT_NORM(r1     * T[2]   + r0 * T[3]);
   1.383 +              r3     = MULT_NORM(r1     * T[3]   - r0 * T[2]);
   1.384 +
   1.385 +              r0     = HALVE(x0[1] + x1[1]);
   1.386 +              r1     = HALVE(x0[0] - x1[0]);
   1.387 +
   1.388 +              w0[2]  = r0     + r2;
   1.389 +              w1[0]  = r0     - r2;
   1.390 +              w0[3]  = r1     + r3;
   1.391 +              w1[1]  = r3     - r1;
   1.392 +
   1.393 +              T     += 4;
   1.394 +              bit   += 4;
   1.395 +              w0    += 4;
   1.396 +
   1.397 +  }while(w0<w1);
   1.398 +}
   1.399 +
   1.400 +void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
   1.401 +  int n=init->n;
   1.402 +  int n2=n>>1;
   1.403 +  int n4=n>>2;
   1.404 +
   1.405 +  /* rotate */
   1.406 +
   1.407 +  DATA_TYPE *iX = in+n2-7;
   1.408 +  DATA_TYPE *oX = out+n2+n4;
   1.409 +  DATA_TYPE *T  = init->trig+n4;
   1.410 +
   1.411 +  do{
   1.412 +    oX         -= 4;
   1.413 +    oX[0]       = MULT_NORM(-iX[2] * T[3] - iX[0]  * T[2]);
   1.414 +    oX[1]       = MULT_NORM (iX[0] * T[3] - iX[2]  * T[2]);
   1.415 +    oX[2]       = MULT_NORM(-iX[6] * T[1] - iX[4]  * T[0]);
   1.416 +    oX[3]       = MULT_NORM (iX[4] * T[1] - iX[6]  * T[0]);
   1.417 +    iX         -= 8;
   1.418 +    T          += 4;
   1.419 +  }while(iX>=in);
   1.420 +
   1.421 +  iX            = in+n2-8;
   1.422 +  oX            = out+n2+n4;
   1.423 +  T             = init->trig+n4;
   1.424 +
   1.425 +  do{
   1.426 +    T          -= 4;
   1.427 +    oX[0]       =  MULT_NORM (iX[4] * T[3] + iX[6] * T[2]);
   1.428 +    oX[1]       =  MULT_NORM (iX[4] * T[2] - iX[6] * T[3]);
   1.429 +    oX[2]       =  MULT_NORM (iX[0] * T[1] + iX[2] * T[0]);
   1.430 +    oX[3]       =  MULT_NORM (iX[0] * T[0] - iX[2] * T[1]);
   1.431 +    iX         -= 8;
   1.432 +    oX         += 4;
   1.433 +  }while(iX>=in);
   1.434 +
   1.435 +  mdct_butterflies(init,out+n2,n2);
   1.436 +  mdct_bitreverse(init,out);
   1.437 +
   1.438 +  /* roatate + window */
   1.439 +
   1.440 +  {
   1.441 +    DATA_TYPE *oX1=out+n2+n4;
   1.442 +    DATA_TYPE *oX2=out+n2+n4;
   1.443 +    DATA_TYPE *iX =out;
   1.444 +    T             =init->trig+n2;
   1.445 +
   1.446 +    do{
   1.447 +      oX1-=4;
   1.448 +
   1.449 +      oX1[3]  =  MULT_NORM (iX[0] * T[1] - iX[1] * T[0]);
   1.450 +      oX2[0]  = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]);
   1.451 +
   1.452 +      oX1[2]  =  MULT_NORM (iX[2] * T[3] - iX[3] * T[2]);
   1.453 +      oX2[1]  = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]);
   1.454 +
   1.455 +      oX1[1]  =  MULT_NORM (iX[4] * T[5] - iX[5] * T[4]);
   1.456 +      oX2[2]  = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]);
   1.457 +
   1.458 +      oX1[0]  =  MULT_NORM (iX[6] * T[7] - iX[7] * T[6]);
   1.459 +      oX2[3]  = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]);
   1.460 +
   1.461 +      oX2+=4;
   1.462 +      iX    +=   8;
   1.463 +      T     +=   8;
   1.464 +    }while(iX<oX1);
   1.465 +
   1.466 +    iX=out+n2+n4;
   1.467 +    oX1=out+n4;
   1.468 +    oX2=oX1;
   1.469 +
   1.470 +    do{
   1.471 +      oX1-=4;
   1.472 +      iX-=4;
   1.473 +
   1.474 +      oX2[0] = -(oX1[3] = iX[3]);
   1.475 +      oX2[1] = -(oX1[2] = iX[2]);
   1.476 +      oX2[2] = -(oX1[1] = iX[1]);
   1.477 +      oX2[3] = -(oX1[0] = iX[0]);
   1.478 +
   1.479 +      oX2+=4;
   1.480 +    }while(oX2<iX);
   1.481 +
   1.482 +    iX=out+n2+n4;
   1.483 +    oX1=out+n2+n4;
   1.484 +    oX2=out+n2;
   1.485 +    do{
   1.486 +      oX1-=4;
   1.487 +      oX1[0]= iX[3];
   1.488 +      oX1[1]= iX[2];
   1.489 +      oX1[2]= iX[1];
   1.490 +      oX1[3]= iX[0];
   1.491 +      iX+=4;
   1.492 +    }while(oX1>oX2);
   1.493 +  }
   1.494 +}
   1.495 +
   1.496 +void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
   1.497 +  int n=init->n;
   1.498 +  int n2=n>>1;
   1.499 +  int n4=n>>2;
   1.500 +  int n8=n>>3;
   1.501 +  DATA_TYPE *w=alloca(n*sizeof(*w)); /* forward needs working space */
   1.502 +  DATA_TYPE *w2=w+n2;
   1.503 +
   1.504 +  /* rotate */
   1.505 +
   1.506 +  /* window + rotate + step 1 */
   1.507 +
   1.508 +  REG_TYPE r0;
   1.509 +  REG_TYPE r1;
   1.510 +  DATA_TYPE *x0=in+n2+n4;
   1.511 +  DATA_TYPE *x1=x0+1;
   1.512 +  DATA_TYPE *T=init->trig+n2;
   1.513 +
   1.514 +  int i=0;
   1.515 +
   1.516 +  for(i=0;i<n8;i+=2){
   1.517 +    x0 -=4;
   1.518 +    T-=2;
   1.519 +    r0= x0[2] + x1[0];
   1.520 +    r1= x0[0] + x1[2];
   1.521 +    w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
   1.522 +    w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
   1.523 +    x1 +=4;
   1.524 +  }
   1.525 +
   1.526 +  x1=in+1;
   1.527 +
   1.528 +  for(;i<n2-n8;i+=2){
   1.529 +    T-=2;
   1.530 +    x0 -=4;
   1.531 +    r0= x0[2] - x1[0];
   1.532 +    r1= x0[0] - x1[2];
   1.533 +    w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
   1.534 +    w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
   1.535 +    x1 +=4;
   1.536 +  }
   1.537 +
   1.538 +  x0=in+n;
   1.539 +
   1.540 +  for(;i<n2;i+=2){
   1.541 +    T-=2;
   1.542 +    x0 -=4;
   1.543 +    r0= -x0[2] - x1[0];
   1.544 +    r1= -x0[0] - x1[2];
   1.545 +    w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
   1.546 +    w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
   1.547 +    x1 +=4;
   1.548 +  }
   1.549 +
   1.550 +
   1.551 +  mdct_butterflies(init,w+n2,n2);
   1.552 +  mdct_bitreverse(init,w);
   1.553 +
   1.554 +  /* roatate + window */
   1.555 +
   1.556 +  T=init->trig+n2;
   1.557 +  x0=out+n2;
   1.558 +
   1.559 +  for(i=0;i<n4;i++){
   1.560 +    x0--;
   1.561 +    out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale);
   1.562 +    x0[0]  =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale);
   1.563 +    w+=2;
   1.564 +    T+=2;
   1.565 +  }
   1.566 +}