vrshoot
view libs/vorbis/mdct.c @ 1:e7ca128b8713
looks nice :)
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sun, 02 Feb 2014 00:35:22 +0200 |
parents | |
children |
line source
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function: normalized modified discrete cosine transform
14 power of two length transform only [64 <= n ]
15 last mod: $Id: mdct.c 16227 2009-07-08 06:58:46Z xiphmont $
17 Original algorithm adapted long ago from _The use of multirate filter
18 banks for coding of high quality digital audio_, by T. Sporer,
19 K. Brandenburg and B. Edler, collection of the European Signal
20 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
21 211-214
23 The below code implements an algorithm that no longer looks much like
24 that presented in the paper, but the basic structure remains if you
25 dig deep enough to see it.
27 This module DOES NOT INCLUDE code to generate/apply the window
28 function. Everybody has their own weird favorite including me... I
29 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
30 vehemently disagree.
32 ********************************************************************/
34 /* this can also be run as an integer transform by uncommenting a
35 define in mdct.h; the integerization is a first pass and although
36 it's likely stable for Vorbis, the dynamic range is constrained and
37 roundoff isn't done (so it's noisy). Consider it functional, but
38 only a starting point. There's no point on a machine with an FPU */
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <math.h>
44 #include "vorbis/codec.h"
45 #include "mdct.h"
46 #include "os.h"
47 #include "misc.h"
49 /* build lookups for trig functions; also pre-figure scaling and
50 some window function algebra. */
52 void mdct_init(mdct_lookup *lookup,int n){
53 int *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4));
54 DATA_TYPE *T=_ogg_malloc(sizeof(*T)*(n+n/4));
56 int i;
57 int n2=n>>1;
58 int log2n=lookup->log2n=rint(log((float)n)/log(2.f));
59 lookup->n=n;
60 lookup->trig=T;
61 lookup->bitrev=bitrev;
63 /* trig lookups... */
65 for(i=0;i<n/4;i++){
66 T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i)));
67 T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i)));
68 T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1)));
69 T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1)));
70 }
71 for(i=0;i<n/8;i++){
72 T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5);
73 T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5);
74 }
76 /* bitreverse lookup... */
78 {
79 int mask=(1<<(log2n-1))-1,i,j;
80 int msb=1<<(log2n-2);
81 for(i=0;i<n/8;i++){
82 int acc=0;
83 for(j=0;msb>>j;j++)
84 if((msb>>j)&i)acc|=1<<j;
85 bitrev[i*2]=((~acc)&mask)-1;
86 bitrev[i*2+1]=acc;
88 }
89 }
90 lookup->scale=FLOAT_CONV(4.f/n);
91 }
93 /* 8 point butterfly (in place, 4 register) */
94 STIN void mdct_butterfly_8(DATA_TYPE *x){
95 REG_TYPE r0 = x[6] + x[2];
96 REG_TYPE r1 = x[6] - x[2];
97 REG_TYPE r2 = x[4] + x[0];
98 REG_TYPE r3 = x[4] - x[0];
100 x[6] = r0 + r2;
101 x[4] = r0 - r2;
103 r0 = x[5] - x[1];
104 r2 = x[7] - x[3];
105 x[0] = r1 + r0;
106 x[2] = r1 - r0;
108 r0 = x[5] + x[1];
109 r1 = x[7] + x[3];
110 x[3] = r2 + r3;
111 x[1] = r2 - r3;
112 x[7] = r1 + r0;
113 x[5] = r1 - r0;
115 }
117 /* 16 point butterfly (in place, 4 register) */
118 STIN void mdct_butterfly_16(DATA_TYPE *x){
119 REG_TYPE r0 = x[1] - x[9];
120 REG_TYPE r1 = x[0] - x[8];
122 x[8] += x[0];
123 x[9] += x[1];
124 x[0] = MULT_NORM((r0 + r1) * cPI2_8);
125 x[1] = MULT_NORM((r0 - r1) * cPI2_8);
127 r0 = x[3] - x[11];
128 r1 = x[10] - x[2];
129 x[10] += x[2];
130 x[11] += x[3];
131 x[2] = r0;
132 x[3] = r1;
134 r0 = x[12] - x[4];
135 r1 = x[13] - x[5];
136 x[12] += x[4];
137 x[13] += x[5];
138 x[4] = MULT_NORM((r0 - r1) * cPI2_8);
139 x[5] = MULT_NORM((r0 + r1) * cPI2_8);
141 r0 = x[14] - x[6];
142 r1 = x[15] - x[7];
143 x[14] += x[6];
144 x[15] += x[7];
145 x[6] = r0;
146 x[7] = r1;
148 mdct_butterfly_8(x);
149 mdct_butterfly_8(x+8);
150 }
152 /* 32 point butterfly (in place, 4 register) */
153 STIN void mdct_butterfly_32(DATA_TYPE *x){
154 REG_TYPE r0 = x[30] - x[14];
155 REG_TYPE r1 = x[31] - x[15];
157 x[30] += x[14];
158 x[31] += x[15];
159 x[14] = r0;
160 x[15] = r1;
162 r0 = x[28] - x[12];
163 r1 = x[29] - x[13];
164 x[28] += x[12];
165 x[29] += x[13];
166 x[12] = MULT_NORM( r0 * cPI1_8 - r1 * cPI3_8 );
167 x[13] = MULT_NORM( r0 * cPI3_8 + r1 * cPI1_8 );
169 r0 = x[26] - x[10];
170 r1 = x[27] - x[11];
171 x[26] += x[10];
172 x[27] += x[11];
173 x[10] = MULT_NORM(( r0 - r1 ) * cPI2_8);
174 x[11] = MULT_NORM(( r0 + r1 ) * cPI2_8);
176 r0 = x[24] - x[8];
177 r1 = x[25] - x[9];
178 x[24] += x[8];
179 x[25] += x[9];
180 x[8] = MULT_NORM( r0 * cPI3_8 - r1 * cPI1_8 );
181 x[9] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 );
183 r0 = x[22] - x[6];
184 r1 = x[7] - x[23];
185 x[22] += x[6];
186 x[23] += x[7];
187 x[6] = r1;
188 x[7] = r0;
190 r0 = x[4] - x[20];
191 r1 = x[5] - x[21];
192 x[20] += x[4];
193 x[21] += x[5];
194 x[4] = MULT_NORM( r1 * cPI1_8 + r0 * cPI3_8 );
195 x[5] = MULT_NORM( r1 * cPI3_8 - r0 * cPI1_8 );
197 r0 = x[2] - x[18];
198 r1 = x[3] - x[19];
199 x[18] += x[2];
200 x[19] += x[3];
201 x[2] = MULT_NORM(( r1 + r0 ) * cPI2_8);
202 x[3] = MULT_NORM(( r1 - r0 ) * cPI2_8);
204 r0 = x[0] - x[16];
205 r1 = x[1] - x[17];
206 x[16] += x[0];
207 x[17] += x[1];
208 x[0] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 );
209 x[1] = MULT_NORM( r1 * cPI1_8 - r0 * cPI3_8 );
211 mdct_butterfly_16(x);
212 mdct_butterfly_16(x+16);
214 }
216 /* N point first stage butterfly (in place, 2 register) */
217 STIN void mdct_butterfly_first(DATA_TYPE *T,
218 DATA_TYPE *x,
219 int points){
221 DATA_TYPE *x1 = x + points - 8;
222 DATA_TYPE *x2 = x + (points>>1) - 8;
223 REG_TYPE r0;
224 REG_TYPE r1;
226 do{
228 r0 = x1[6] - x2[6];
229 r1 = x1[7] - x2[7];
230 x1[6] += x2[6];
231 x1[7] += x2[7];
232 x2[6] = MULT_NORM(r1 * T[1] + r0 * T[0]);
233 x2[7] = MULT_NORM(r1 * T[0] - r0 * T[1]);
235 r0 = x1[4] - x2[4];
236 r1 = x1[5] - x2[5];
237 x1[4] += x2[4];
238 x1[5] += x2[5];
239 x2[4] = MULT_NORM(r1 * T[5] + r0 * T[4]);
240 x2[5] = MULT_NORM(r1 * T[4] - r0 * T[5]);
242 r0 = x1[2] - x2[2];
243 r1 = x1[3] - x2[3];
244 x1[2] += x2[2];
245 x1[3] += x2[3];
246 x2[2] = MULT_NORM(r1 * T[9] + r0 * T[8]);
247 x2[3] = MULT_NORM(r1 * T[8] - r0 * T[9]);
249 r0 = x1[0] - x2[0];
250 r1 = x1[1] - x2[1];
251 x1[0] += x2[0];
252 x1[1] += x2[1];
253 x2[0] = MULT_NORM(r1 * T[13] + r0 * T[12]);
254 x2[1] = MULT_NORM(r1 * T[12] - r0 * T[13]);
256 x1-=8;
257 x2-=8;
258 T+=16;
260 }while(x2>=x);
261 }
263 /* N/stage point generic N stage butterfly (in place, 2 register) */
264 STIN void mdct_butterfly_generic(DATA_TYPE *T,
265 DATA_TYPE *x,
266 int points,
267 int trigint){
269 DATA_TYPE *x1 = x + points - 8;
270 DATA_TYPE *x2 = x + (points>>1) - 8;
271 REG_TYPE r0;
272 REG_TYPE r1;
274 do{
276 r0 = x1[6] - x2[6];
277 r1 = x1[7] - x2[7];
278 x1[6] += x2[6];
279 x1[7] += x2[7];
280 x2[6] = MULT_NORM(r1 * T[1] + r0 * T[0]);
281 x2[7] = MULT_NORM(r1 * T[0] - r0 * T[1]);
283 T+=trigint;
285 r0 = x1[4] - x2[4];
286 r1 = x1[5] - x2[5];
287 x1[4] += x2[4];
288 x1[5] += x2[5];
289 x2[4] = MULT_NORM(r1 * T[1] + r0 * T[0]);
290 x2[5] = MULT_NORM(r1 * T[0] - r0 * T[1]);
292 T+=trigint;
294 r0 = x1[2] - x2[2];
295 r1 = x1[3] - x2[3];
296 x1[2] += x2[2];
297 x1[3] += x2[3];
298 x2[2] = MULT_NORM(r1 * T[1] + r0 * T[0]);
299 x2[3] = MULT_NORM(r1 * T[0] - r0 * T[1]);
301 T+=trigint;
303 r0 = x1[0] - x2[0];
304 r1 = x1[1] - x2[1];
305 x1[0] += x2[0];
306 x1[1] += x2[1];
307 x2[0] = MULT_NORM(r1 * T[1] + r0 * T[0]);
308 x2[1] = MULT_NORM(r1 * T[0] - r0 * T[1]);
310 T+=trigint;
311 x1-=8;
312 x2-=8;
314 }while(x2>=x);
315 }
317 STIN void mdct_butterflies(mdct_lookup *init,
318 DATA_TYPE *x,
319 int points){
321 DATA_TYPE *T=init->trig;
322 int stages=init->log2n-5;
323 int i,j;
325 if(--stages>0){
326 mdct_butterfly_first(T,x,points);
327 }
329 for(i=1;--stages>0;i++){
330 for(j=0;j<(1<<i);j++)
331 mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
332 }
334 for(j=0;j<points;j+=32)
335 mdct_butterfly_32(x+j);
337 }
339 void mdct_clear(mdct_lookup *l){
340 if(l){
341 if(l->trig)_ogg_free(l->trig);
342 if(l->bitrev)_ogg_free(l->bitrev);
343 memset(l,0,sizeof(*l));
344 }
345 }
347 STIN void mdct_bitreverse(mdct_lookup *init,
348 DATA_TYPE *x){
349 int n = init->n;
350 int *bit = init->bitrev;
351 DATA_TYPE *w0 = x;
352 DATA_TYPE *w1 = x = w0+(n>>1);
353 DATA_TYPE *T = init->trig+n;
355 do{
356 DATA_TYPE *x0 = x+bit[0];
357 DATA_TYPE *x1 = x+bit[1];
359 REG_TYPE r0 = x0[1] - x1[1];
360 REG_TYPE r1 = x0[0] + x1[0];
361 REG_TYPE r2 = MULT_NORM(r1 * T[0] + r0 * T[1]);
362 REG_TYPE r3 = MULT_NORM(r1 * T[1] - r0 * T[0]);
364 w1 -= 4;
366 r0 = HALVE(x0[1] + x1[1]);
367 r1 = HALVE(x0[0] - x1[0]);
369 w0[0] = r0 + r2;
370 w1[2] = r0 - r2;
371 w0[1] = r1 + r3;
372 w1[3] = r3 - r1;
374 x0 = x+bit[2];
375 x1 = x+bit[3];
377 r0 = x0[1] - x1[1];
378 r1 = x0[0] + x1[0];
379 r2 = MULT_NORM(r1 * T[2] + r0 * T[3]);
380 r3 = MULT_NORM(r1 * T[3] - r0 * T[2]);
382 r0 = HALVE(x0[1] + x1[1]);
383 r1 = HALVE(x0[0] - x1[0]);
385 w0[2] = r0 + r2;
386 w1[0] = r0 - r2;
387 w0[3] = r1 + r3;
388 w1[1] = r3 - r1;
390 T += 4;
391 bit += 4;
392 w0 += 4;
394 }while(w0<w1);
395 }
397 void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
398 int n=init->n;
399 int n2=n>>1;
400 int n4=n>>2;
402 /* rotate */
404 DATA_TYPE *iX = in+n2-7;
405 DATA_TYPE *oX = out+n2+n4;
406 DATA_TYPE *T = init->trig+n4;
408 do{
409 oX -= 4;
410 oX[0] = MULT_NORM(-iX[2] * T[3] - iX[0] * T[2]);
411 oX[1] = MULT_NORM (iX[0] * T[3] - iX[2] * T[2]);
412 oX[2] = MULT_NORM(-iX[6] * T[1] - iX[4] * T[0]);
413 oX[3] = MULT_NORM (iX[4] * T[1] - iX[6] * T[0]);
414 iX -= 8;
415 T += 4;
416 }while(iX>=in);
418 iX = in+n2-8;
419 oX = out+n2+n4;
420 T = init->trig+n4;
422 do{
423 T -= 4;
424 oX[0] = MULT_NORM (iX[4] * T[3] + iX[6] * T[2]);
425 oX[1] = MULT_NORM (iX[4] * T[2] - iX[6] * T[3]);
426 oX[2] = MULT_NORM (iX[0] * T[1] + iX[2] * T[0]);
427 oX[3] = MULT_NORM (iX[0] * T[0] - iX[2] * T[1]);
428 iX -= 8;
429 oX += 4;
430 }while(iX>=in);
432 mdct_butterflies(init,out+n2,n2);
433 mdct_bitreverse(init,out);
435 /* roatate + window */
437 {
438 DATA_TYPE *oX1=out+n2+n4;
439 DATA_TYPE *oX2=out+n2+n4;
440 DATA_TYPE *iX =out;
441 T =init->trig+n2;
443 do{
444 oX1-=4;
446 oX1[3] = MULT_NORM (iX[0] * T[1] - iX[1] * T[0]);
447 oX2[0] = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]);
449 oX1[2] = MULT_NORM (iX[2] * T[3] - iX[3] * T[2]);
450 oX2[1] = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]);
452 oX1[1] = MULT_NORM (iX[4] * T[5] - iX[5] * T[4]);
453 oX2[2] = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]);
455 oX1[0] = MULT_NORM (iX[6] * T[7] - iX[7] * T[6]);
456 oX2[3] = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]);
458 oX2+=4;
459 iX += 8;
460 T += 8;
461 }while(iX<oX1);
463 iX=out+n2+n4;
464 oX1=out+n4;
465 oX2=oX1;
467 do{
468 oX1-=4;
469 iX-=4;
471 oX2[0] = -(oX1[3] = iX[3]);
472 oX2[1] = -(oX1[2] = iX[2]);
473 oX2[2] = -(oX1[1] = iX[1]);
474 oX2[3] = -(oX1[0] = iX[0]);
476 oX2+=4;
477 }while(oX2<iX);
479 iX=out+n2+n4;
480 oX1=out+n2+n4;
481 oX2=out+n2;
482 do{
483 oX1-=4;
484 oX1[0]= iX[3];
485 oX1[1]= iX[2];
486 oX1[2]= iX[1];
487 oX1[3]= iX[0];
488 iX+=4;
489 }while(oX1>oX2);
490 }
491 }
493 void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
494 int n=init->n;
495 int n2=n>>1;
496 int n4=n>>2;
497 int n8=n>>3;
498 DATA_TYPE *w=alloca(n*sizeof(*w)); /* forward needs working space */
499 DATA_TYPE *w2=w+n2;
501 /* rotate */
503 /* window + rotate + step 1 */
505 REG_TYPE r0;
506 REG_TYPE r1;
507 DATA_TYPE *x0=in+n2+n4;
508 DATA_TYPE *x1=x0+1;
509 DATA_TYPE *T=init->trig+n2;
511 int i=0;
513 for(i=0;i<n8;i+=2){
514 x0 -=4;
515 T-=2;
516 r0= x0[2] + x1[0];
517 r1= x0[0] + x1[2];
518 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]);
519 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
520 x1 +=4;
521 }
523 x1=in+1;
525 for(;i<n2-n8;i+=2){
526 T-=2;
527 x0 -=4;
528 r0= x0[2] - x1[0];
529 r1= x0[0] - x1[2];
530 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]);
531 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
532 x1 +=4;
533 }
535 x0=in+n;
537 for(;i<n2;i+=2){
538 T-=2;
539 x0 -=4;
540 r0= -x0[2] - x1[0];
541 r1= -x0[0] - x1[2];
542 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]);
543 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
544 x1 +=4;
545 }
548 mdct_butterflies(init,w+n2,n2);
549 mdct_bitreverse(init,w);
551 /* roatate + window */
553 T=init->trig+n2;
554 x0=out+n2;
556 for(i=0;i<n4;i++){
557 x0--;
558 out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale);
559 x0[0] =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale);
560 w+=2;
561 T+=2;
562 }
563 }