vrshoot

annotate libs/kissfft/kiss_fft.c @ 0:b2f14e535253

initial commit
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 01 Feb 2014 19:58:19 +0200
parents
children
rev   line source
nuclear@0 1 /*
nuclear@0 2 Copyright (c) 2003-2010, Mark Borgerding
nuclear@0 3
nuclear@0 4 All rights reserved.
nuclear@0 5
nuclear@0 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
nuclear@0 7
nuclear@0 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
nuclear@0 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
nuclear@0 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
nuclear@0 11
nuclear@0 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
nuclear@0 13 */
nuclear@0 14
nuclear@0 15
nuclear@0 16 #include "_kiss_fft_guts.h"
nuclear@0 17 /* The guts header contains all the multiplication and addition macros that are defined for
nuclear@0 18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
nuclear@0 19 */
nuclear@0 20
nuclear@0 21 static void kf_bfly2(
nuclear@0 22 kiss_fft_cpx * Fout,
nuclear@0 23 const size_t fstride,
nuclear@0 24 const kiss_fft_cfg st,
nuclear@0 25 int m
nuclear@0 26 )
nuclear@0 27 {
nuclear@0 28 kiss_fft_cpx * Fout2;
nuclear@0 29 kiss_fft_cpx * tw1 = st->twiddles;
nuclear@0 30 kiss_fft_cpx t;
nuclear@0 31 Fout2 = Fout + m;
nuclear@0 32 do{
nuclear@0 33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
nuclear@0 34
nuclear@0 35 C_MUL (t, *Fout2 , *tw1);
nuclear@0 36 tw1 += fstride;
nuclear@0 37 C_SUB( *Fout2 , *Fout , t );
nuclear@0 38 C_ADDTO( *Fout , t );
nuclear@0 39 ++Fout2;
nuclear@0 40 ++Fout;
nuclear@0 41 }while (--m);
nuclear@0 42 }
nuclear@0 43
nuclear@0 44 static void kf_bfly4(
nuclear@0 45 kiss_fft_cpx * Fout,
nuclear@0 46 const size_t fstride,
nuclear@0 47 const kiss_fft_cfg st,
nuclear@0 48 const size_t m
nuclear@0 49 )
nuclear@0 50 {
nuclear@0 51 kiss_fft_cpx *tw1,*tw2,*tw3;
nuclear@0 52 kiss_fft_cpx scratch[6];
nuclear@0 53 size_t k=m;
nuclear@0 54 const size_t m2=2*m;
nuclear@0 55 const size_t m3=3*m;
nuclear@0 56
nuclear@0 57
nuclear@0 58 tw3 = tw2 = tw1 = st->twiddles;
nuclear@0 59
nuclear@0 60 do {
nuclear@0 61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
nuclear@0 62
nuclear@0 63 C_MUL(scratch[0],Fout[m] , *tw1 );
nuclear@0 64 C_MUL(scratch[1],Fout[m2] , *tw2 );
nuclear@0 65 C_MUL(scratch[2],Fout[m3] , *tw3 );
nuclear@0 66
nuclear@0 67 C_SUB( scratch[5] , *Fout, scratch[1] );
nuclear@0 68 C_ADDTO(*Fout, scratch[1]);
nuclear@0 69 C_ADD( scratch[3] , scratch[0] , scratch[2] );
nuclear@0 70 C_SUB( scratch[4] , scratch[0] , scratch[2] );
nuclear@0 71 C_SUB( Fout[m2], *Fout, scratch[3] );
nuclear@0 72 tw1 += fstride;
nuclear@0 73 tw2 += fstride*2;
nuclear@0 74 tw3 += fstride*3;
nuclear@0 75 C_ADDTO( *Fout , scratch[3] );
nuclear@0 76
nuclear@0 77 if(st->inverse) {
nuclear@0 78 Fout[m].r = scratch[5].r - scratch[4].i;
nuclear@0 79 Fout[m].i = scratch[5].i + scratch[4].r;
nuclear@0 80 Fout[m3].r = scratch[5].r + scratch[4].i;
nuclear@0 81 Fout[m3].i = scratch[5].i - scratch[4].r;
nuclear@0 82 }else{
nuclear@0 83 Fout[m].r = scratch[5].r + scratch[4].i;
nuclear@0 84 Fout[m].i = scratch[5].i - scratch[4].r;
nuclear@0 85 Fout[m3].r = scratch[5].r - scratch[4].i;
nuclear@0 86 Fout[m3].i = scratch[5].i + scratch[4].r;
nuclear@0 87 }
nuclear@0 88 ++Fout;
nuclear@0 89 }while(--k);
nuclear@0 90 }
nuclear@0 91
nuclear@0 92 static void kf_bfly3(
nuclear@0 93 kiss_fft_cpx * Fout,
nuclear@0 94 const size_t fstride,
nuclear@0 95 const kiss_fft_cfg st,
nuclear@0 96 size_t m
nuclear@0 97 )
nuclear@0 98 {
nuclear@0 99 size_t k=m;
nuclear@0 100 const size_t m2 = 2*m;
nuclear@0 101 kiss_fft_cpx *tw1,*tw2;
nuclear@0 102 kiss_fft_cpx scratch[5];
nuclear@0 103 kiss_fft_cpx epi3;
nuclear@0 104 epi3 = st->twiddles[fstride*m];
nuclear@0 105
nuclear@0 106 tw1=tw2=st->twiddles;
nuclear@0 107
nuclear@0 108 do{
nuclear@0 109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
nuclear@0 110
nuclear@0 111 C_MUL(scratch[1],Fout[m] , *tw1);
nuclear@0 112 C_MUL(scratch[2],Fout[m2] , *tw2);
nuclear@0 113
nuclear@0 114 C_ADD(scratch[3],scratch[1],scratch[2]);
nuclear@0 115 C_SUB(scratch[0],scratch[1],scratch[2]);
nuclear@0 116 tw1 += fstride;
nuclear@0 117 tw2 += fstride*2;
nuclear@0 118
nuclear@0 119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
nuclear@0 120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
nuclear@0 121
nuclear@0 122 C_MULBYSCALAR( scratch[0] , epi3.i );
nuclear@0 123
nuclear@0 124 C_ADDTO(*Fout,scratch[3]);
nuclear@0 125
nuclear@0 126 Fout[m2].r = Fout[m].r + scratch[0].i;
nuclear@0 127 Fout[m2].i = Fout[m].i - scratch[0].r;
nuclear@0 128
nuclear@0 129 Fout[m].r -= scratch[0].i;
nuclear@0 130 Fout[m].i += scratch[0].r;
nuclear@0 131
nuclear@0 132 ++Fout;
nuclear@0 133 }while(--k);
nuclear@0 134 }
nuclear@0 135
nuclear@0 136 static void kf_bfly5(
nuclear@0 137 kiss_fft_cpx * Fout,
nuclear@0 138 const size_t fstride,
nuclear@0 139 const kiss_fft_cfg st,
nuclear@0 140 int m
nuclear@0 141 )
nuclear@0 142 {
nuclear@0 143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
nuclear@0 144 int u;
nuclear@0 145 kiss_fft_cpx scratch[13];
nuclear@0 146 kiss_fft_cpx * twiddles = st->twiddles;
nuclear@0 147 kiss_fft_cpx *tw;
nuclear@0 148 kiss_fft_cpx ya,yb;
nuclear@0 149 ya = twiddles[fstride*m];
nuclear@0 150 yb = twiddles[fstride*2*m];
nuclear@0 151
nuclear@0 152 Fout0=Fout;
nuclear@0 153 Fout1=Fout0+m;
nuclear@0 154 Fout2=Fout0+2*m;
nuclear@0 155 Fout3=Fout0+3*m;
nuclear@0 156 Fout4=Fout0+4*m;
nuclear@0 157
nuclear@0 158 tw=st->twiddles;
nuclear@0 159 for ( u=0; u<m; ++u ) {
nuclear@0 160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
nuclear@0 161 scratch[0] = *Fout0;
nuclear@0 162
nuclear@0 163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
nuclear@0 164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
nuclear@0 165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
nuclear@0 166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
nuclear@0 167
nuclear@0 168 C_ADD( scratch[7],scratch[1],scratch[4]);
nuclear@0 169 C_SUB( scratch[10],scratch[1],scratch[4]);
nuclear@0 170 C_ADD( scratch[8],scratch[2],scratch[3]);
nuclear@0 171 C_SUB( scratch[9],scratch[2],scratch[3]);
nuclear@0 172
nuclear@0 173 Fout0->r += scratch[7].r + scratch[8].r;
nuclear@0 174 Fout0->i += scratch[7].i + scratch[8].i;
nuclear@0 175
nuclear@0 176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
nuclear@0 177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
nuclear@0 178
nuclear@0 179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
nuclear@0 180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
nuclear@0 181
nuclear@0 182 C_SUB(*Fout1,scratch[5],scratch[6]);
nuclear@0 183 C_ADD(*Fout4,scratch[5],scratch[6]);
nuclear@0 184
nuclear@0 185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
nuclear@0 186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
nuclear@0 187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
nuclear@0 188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
nuclear@0 189
nuclear@0 190 C_ADD(*Fout2,scratch[11],scratch[12]);
nuclear@0 191 C_SUB(*Fout3,scratch[11],scratch[12]);
nuclear@0 192
nuclear@0 193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
nuclear@0 194 }
nuclear@0 195 }
nuclear@0 196
nuclear@0 197 /* perform the butterfly for one stage of a mixed radix FFT */
nuclear@0 198 static void kf_bfly_generic(
nuclear@0 199 kiss_fft_cpx * Fout,
nuclear@0 200 const size_t fstride,
nuclear@0 201 const kiss_fft_cfg st,
nuclear@0 202 int m,
nuclear@0 203 int p
nuclear@0 204 )
nuclear@0 205 {
nuclear@0 206 int u,k,q1,q;
nuclear@0 207 kiss_fft_cpx * twiddles = st->twiddles;
nuclear@0 208 kiss_fft_cpx t;
nuclear@0 209 int Norig = st->nfft;
nuclear@0 210
nuclear@0 211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
nuclear@0 212
nuclear@0 213 for ( u=0; u<m; ++u ) {
nuclear@0 214 k=u;
nuclear@0 215 for ( q1=0 ; q1<p ; ++q1 ) {
nuclear@0 216 scratch[q1] = Fout[ k ];
nuclear@0 217 C_FIXDIV(scratch[q1],p);
nuclear@0 218 k += m;
nuclear@0 219 }
nuclear@0 220
nuclear@0 221 k=u;
nuclear@0 222 for ( q1=0 ; q1<p ; ++q1 ) {
nuclear@0 223 int twidx=0;
nuclear@0 224 Fout[ k ] = scratch[0];
nuclear@0 225 for (q=1;q<p;++q ) {
nuclear@0 226 twidx += fstride * k;
nuclear@0 227 if (twidx>=Norig) twidx-=Norig;
nuclear@0 228 C_MUL(t,scratch[q] , twiddles[twidx] );
nuclear@0 229 C_ADDTO( Fout[ k ] ,t);
nuclear@0 230 }
nuclear@0 231 k += m;
nuclear@0 232 }
nuclear@0 233 }
nuclear@0 234 KISS_FFT_TMP_FREE(scratch);
nuclear@0 235 }
nuclear@0 236
nuclear@0 237 static
nuclear@0 238 void kf_work(
nuclear@0 239 kiss_fft_cpx * Fout,
nuclear@0 240 const kiss_fft_cpx * f,
nuclear@0 241 const size_t fstride,
nuclear@0 242 int in_stride,
nuclear@0 243 int * factors,
nuclear@0 244 const kiss_fft_cfg st
nuclear@0 245 )
nuclear@0 246 {
nuclear@0 247 kiss_fft_cpx * Fout_beg=Fout;
nuclear@0 248 const int p=*factors++; /* the radix */
nuclear@0 249 const int m=*factors++; /* stage's fft length/p */
nuclear@0 250 const kiss_fft_cpx * Fout_end = Fout + p*m;
nuclear@0 251
nuclear@0 252 #ifdef _OPENMP
nuclear@0 253 /* use openmp extensions at the
nuclear@0 254 * top-level (not recursive)
nuclear@0 255 */
nuclear@0 256 if (fstride==1 && p<=5)
nuclear@0 257 {
nuclear@0 258 int k;
nuclear@0 259
nuclear@0 260 /* execute the p different work units in different threads */
nuclear@0 261 # pragma omp parallel for
nuclear@0 262 for (k=0;k<p;++k)
nuclear@0 263 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
nuclear@0 264 /* all threads have joined by this point */
nuclear@0 265
nuclear@0 266 switch (p) {
nuclear@0 267 case 2: kf_bfly2(Fout,fstride,st,m); break;
nuclear@0 268 case 3: kf_bfly3(Fout,fstride,st,m); break;
nuclear@0 269 case 4: kf_bfly4(Fout,fstride,st,m); break;
nuclear@0 270 case 5: kf_bfly5(Fout,fstride,st,m); break;
nuclear@0 271 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
nuclear@0 272 }
nuclear@0 273 return;
nuclear@0 274 }
nuclear@0 275 #endif
nuclear@0 276
nuclear@0 277 if (m==1) {
nuclear@0 278 do{
nuclear@0 279 *Fout = *f;
nuclear@0 280 f += fstride*in_stride;
nuclear@0 281 }while(++Fout != Fout_end );
nuclear@0 282 }else{
nuclear@0 283 do{
nuclear@0 284 /* recursive call:
nuclear@0 285 * DFT of size m*p performed by doing
nuclear@0 286 * p instances of smaller DFTs of size m,
nuclear@0 287 * each one takes a decimated version of the input
nuclear@0 288 */
nuclear@0 289 kf_work( Fout , f, fstride*p, in_stride, factors,st);
nuclear@0 290 f += fstride*in_stride;
nuclear@0 291 }while( (Fout += m) != Fout_end );
nuclear@0 292 }
nuclear@0 293
nuclear@0 294 Fout=Fout_beg;
nuclear@0 295
nuclear@0 296 /* recombine the p smaller DFTs */
nuclear@0 297 switch (p) {
nuclear@0 298 case 2: kf_bfly2(Fout,fstride,st,m); break;
nuclear@0 299 case 3: kf_bfly3(Fout,fstride,st,m); break;
nuclear@0 300 case 4: kf_bfly4(Fout,fstride,st,m); break;
nuclear@0 301 case 5: kf_bfly5(Fout,fstride,st,m); break;
nuclear@0 302 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
nuclear@0 303 }
nuclear@0 304 }
nuclear@0 305
nuclear@0 306 /* facbuf is populated by p1,m1,p2,m2, ...
nuclear@0 307 where
nuclear@0 308 p[i] * m[i] = m[i-1]
nuclear@0 309 m0 = n */
nuclear@0 310 static
nuclear@0 311 void kf_factor(int n,int * facbuf)
nuclear@0 312 {
nuclear@0 313 int p=4;
nuclear@0 314 double floor_sqrt;
nuclear@0 315 floor_sqrt = floor( sqrt((double)n) );
nuclear@0 316
nuclear@0 317 /*factor out powers of 4, powers of 2, then any remaining primes */
nuclear@0 318 do {
nuclear@0 319 while (n % p) {
nuclear@0 320 switch (p) {
nuclear@0 321 case 4: p = 2; break;
nuclear@0 322 case 2: p = 3; break;
nuclear@0 323 default: p += 2; break;
nuclear@0 324 }
nuclear@0 325 if (p > floor_sqrt)
nuclear@0 326 p = n; /* no more factors, skip to end */
nuclear@0 327 }
nuclear@0 328 n /= p;
nuclear@0 329 *facbuf++ = p;
nuclear@0 330 *facbuf++ = n;
nuclear@0 331 } while (n > 1);
nuclear@0 332 }
nuclear@0 333
nuclear@0 334 /*
nuclear@0 335 *
nuclear@0 336 * User-callable function to allocate all necessary storage space for the fft.
nuclear@0 337 *
nuclear@0 338 * The return value is a contiguous block of memory, allocated with malloc. As such,
nuclear@0 339 * It can be freed with free(), rather than a kiss_fft-specific function.
nuclear@0 340 * */
nuclear@0 341 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
nuclear@0 342 {
nuclear@0 343 kiss_fft_cfg st=NULL;
nuclear@0 344 size_t memneeded = sizeof(struct kiss_fft_state)
nuclear@0 345 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
nuclear@0 346
nuclear@0 347 if ( lenmem==NULL ) {
nuclear@0 348 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
nuclear@0 349 }else{
nuclear@0 350 if (mem != NULL && *lenmem >= memneeded)
nuclear@0 351 st = (kiss_fft_cfg)mem;
nuclear@0 352 *lenmem = memneeded;
nuclear@0 353 }
nuclear@0 354 if (st) {
nuclear@0 355 int i;
nuclear@0 356 st->nfft=nfft;
nuclear@0 357 st->inverse = inverse_fft;
nuclear@0 358
nuclear@0 359 for (i=0;i<nfft;++i) {
nuclear@0 360 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
nuclear@0 361 double phase = -2*pi*i / nfft;
nuclear@0 362 if (st->inverse)
nuclear@0 363 phase *= -1;
nuclear@0 364 kf_cexp(st->twiddles+i, phase );
nuclear@0 365 }
nuclear@0 366
nuclear@0 367 kf_factor(nfft,st->factors);
nuclear@0 368 }
nuclear@0 369 return st;
nuclear@0 370 }
nuclear@0 371
nuclear@0 372
nuclear@0 373 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
nuclear@0 374 {
nuclear@0 375 if (fin == fout) {
nuclear@0 376 /* NOTE: this is not really an in-place FFT algorithm.
nuclear@0 377 * It just performs an out-of-place FFT into a temp buffer
nuclear@0 378 */
nuclear@0 379 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
nuclear@0 380 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
nuclear@0 381 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
nuclear@0 382 KISS_FFT_TMP_FREE(tmpbuf);
nuclear@0 383 }else{
nuclear@0 384 kf_work( fout, fin, 1,in_stride, st->factors,st );
nuclear@0 385 }
nuclear@0 386 }
nuclear@0 387
nuclear@0 388 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
nuclear@0 389 {
nuclear@0 390 kiss_fft_stride(cfg,fin,fout,1);
nuclear@0 391 }
nuclear@0 392
nuclear@0 393
nuclear@0 394 void kiss_fft_cleanup(void)
nuclear@0 395 {
nuclear@0 396 /* nothing needed any more */
nuclear@0 397 }
nuclear@0 398
nuclear@0 399 int kiss_fft_next_fast_size(int n)
nuclear@0 400 {
nuclear@0 401 while(1) {
nuclear@0 402 int m=n;
nuclear@0 403 while ( (m%2) == 0 ) m/=2;
nuclear@0 404 while ( (m%3) == 0 ) m/=3;
nuclear@0 405 while ( (m%5) == 0 ) m/=5;
nuclear@0 406 if (m<=1)
nuclear@0 407 break; /* n is completely factorable by twos, threes, and fives */
nuclear@0 408 n++;
nuclear@0 409 }
nuclear@0 410 return n;
nuclear@0 411 }