dbf-halloween2015

diff libs/libjpeg/jfdctflt.c @ 1:c3f5c32cb210

barfed all the libraries in the source tree to make porting easier
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 01 Nov 2015 00:36:56 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libs/libjpeg/jfdctflt.c	Sun Nov 01 00:36:56 2015 +0200
     1.3 @@ -0,0 +1,168 @@
     1.4 +/*
     1.5 + * jfdctflt.c
     1.6 + *
     1.7 + * Copyright (C) 1994-1996, Thomas G. Lane.
     1.8 + * This file is part of the Independent JPEG Group's software.
     1.9 + * For conditions of distribution and use, see the accompanying README file.
    1.10 + *
    1.11 + * This file contains a floating-point implementation of the
    1.12 + * forward DCT (Discrete Cosine Transform).
    1.13 + *
    1.14 + * This implementation should be more accurate than either of the integer
    1.15 + * DCT implementations.  However, it may not give the same results on all
    1.16 + * machines because of differences in roundoff behavior.  Speed will depend
    1.17 + * on the hardware's floating point capacity.
    1.18 + *
    1.19 + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
    1.20 + * on each column.  Direct algorithms are also available, but they are
    1.21 + * much more complex and seem not to be any faster when reduced to code.
    1.22 + *
    1.23 + * This implementation is based on Arai, Agui, and Nakajima's algorithm for
    1.24 + * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
    1.25 + * Japanese, but the algorithm is described in the Pennebaker & Mitchell
    1.26 + * JPEG textbook (see REFERENCES section in file README).  The following code
    1.27 + * is based directly on figure 4-8 in P&M.
    1.28 + * While an 8-point DCT cannot be done in less than 11 multiplies, it is
    1.29 + * possible to arrange the computation so that many of the multiplies are
    1.30 + * simple scalings of the final outputs.  These multiplies can then be
    1.31 + * folded into the multiplications or divisions by the JPEG quantization
    1.32 + * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
    1.33 + * to be done in the DCT itself.
    1.34 + * The primary disadvantage of this method is that with a fixed-point
    1.35 + * implementation, accuracy is lost due to imprecise representation of the
    1.36 + * scaled quantization values.  However, that problem does not arise if
    1.37 + * we use floating point arithmetic.
    1.38 + */
    1.39 +
    1.40 +#define JPEG_INTERNALS
    1.41 +#include "jinclude.h"
    1.42 +#include "jpeglib.h"
    1.43 +#include "jdct.h"		/* Private declarations for DCT subsystem */
    1.44 +
    1.45 +#ifdef DCT_FLOAT_SUPPORTED
    1.46 +
    1.47 +
    1.48 +/*
    1.49 + * This module is specialized to the case DCTSIZE = 8.
    1.50 + */
    1.51 +
    1.52 +#if DCTSIZE != 8
    1.53 +  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
    1.54 +#endif
    1.55 +
    1.56 +
    1.57 +/*
    1.58 + * Perform the forward DCT on one block of samples.
    1.59 + */
    1.60 +
    1.61 +GLOBAL(void)
    1.62 +jpeg_fdct_float (FAST_FLOAT * data)
    1.63 +{
    1.64 +  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    1.65 +  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
    1.66 +  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
    1.67 +  FAST_FLOAT *dataptr;
    1.68 +  int ctr;
    1.69 +
    1.70 +  /* Pass 1: process rows. */
    1.71 +
    1.72 +  dataptr = data;
    1.73 +  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    1.74 +    tmp0 = dataptr[0] + dataptr[7];
    1.75 +    tmp7 = dataptr[0] - dataptr[7];
    1.76 +    tmp1 = dataptr[1] + dataptr[6];
    1.77 +    tmp6 = dataptr[1] - dataptr[6];
    1.78 +    tmp2 = dataptr[2] + dataptr[5];
    1.79 +    tmp5 = dataptr[2] - dataptr[5];
    1.80 +    tmp3 = dataptr[3] + dataptr[4];
    1.81 +    tmp4 = dataptr[3] - dataptr[4];
    1.82 +    
    1.83 +    /* Even part */
    1.84 +    
    1.85 +    tmp10 = tmp0 + tmp3;	/* phase 2 */
    1.86 +    tmp13 = tmp0 - tmp3;
    1.87 +    tmp11 = tmp1 + tmp2;
    1.88 +    tmp12 = tmp1 - tmp2;
    1.89 +    
    1.90 +    dataptr[0] = tmp10 + tmp11; /* phase 3 */
    1.91 +    dataptr[4] = tmp10 - tmp11;
    1.92 +    
    1.93 +    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
    1.94 +    dataptr[2] = tmp13 + z1;	/* phase 5 */
    1.95 +    dataptr[6] = tmp13 - z1;
    1.96 +    
    1.97 +    /* Odd part */
    1.98 +
    1.99 +    tmp10 = tmp4 + tmp5;	/* phase 2 */
   1.100 +    tmp11 = tmp5 + tmp6;
   1.101 +    tmp12 = tmp6 + tmp7;
   1.102 +
   1.103 +    /* The rotator is modified from fig 4-8 to avoid extra negations. */
   1.104 +    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
   1.105 +    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
   1.106 +    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
   1.107 +    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
   1.108 +
   1.109 +    z11 = tmp7 + z3;		/* phase 5 */
   1.110 +    z13 = tmp7 - z3;
   1.111 +
   1.112 +    dataptr[5] = z13 + z2;	/* phase 6 */
   1.113 +    dataptr[3] = z13 - z2;
   1.114 +    dataptr[1] = z11 + z4;
   1.115 +    dataptr[7] = z11 - z4;
   1.116 +
   1.117 +    dataptr += DCTSIZE;		/* advance pointer to next row */
   1.118 +  }
   1.119 +
   1.120 +  /* Pass 2: process columns. */
   1.121 +
   1.122 +  dataptr = data;
   1.123 +  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
   1.124 +    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
   1.125 +    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
   1.126 +    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
   1.127 +    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
   1.128 +    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
   1.129 +    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
   1.130 +    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
   1.131 +    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
   1.132 +    
   1.133 +    /* Even part */
   1.134 +    
   1.135 +    tmp10 = tmp0 + tmp3;	/* phase 2 */
   1.136 +    tmp13 = tmp0 - tmp3;
   1.137 +    tmp11 = tmp1 + tmp2;
   1.138 +    tmp12 = tmp1 - tmp2;
   1.139 +    
   1.140 +    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
   1.141 +    dataptr[DCTSIZE*4] = tmp10 - tmp11;
   1.142 +    
   1.143 +    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
   1.144 +    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
   1.145 +    dataptr[DCTSIZE*6] = tmp13 - z1;
   1.146 +    
   1.147 +    /* Odd part */
   1.148 +
   1.149 +    tmp10 = tmp4 + tmp5;	/* phase 2 */
   1.150 +    tmp11 = tmp5 + tmp6;
   1.151 +    tmp12 = tmp6 + tmp7;
   1.152 +
   1.153 +    /* The rotator is modified from fig 4-8 to avoid extra negations. */
   1.154 +    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
   1.155 +    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
   1.156 +    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
   1.157 +    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
   1.158 +
   1.159 +    z11 = tmp7 + z3;		/* phase 5 */
   1.160 +    z13 = tmp7 - z3;
   1.161 +
   1.162 +    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
   1.163 +    dataptr[DCTSIZE*3] = z13 - z2;
   1.164 +    dataptr[DCTSIZE*1] = z11 + z4;
   1.165 +    dataptr[DCTSIZE*7] = z11 - z4;
   1.166 +
   1.167 +    dataptr++;			/* advance pointer to next column */
   1.168 +  }
   1.169 +}
   1.170 +
   1.171 +#endif /* DCT_FLOAT_SUPPORTED */