vrshoot
diff libs/libjpeg/jfdctflt.c @ 0:b2f14e535253
initial commit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 01 Feb 2014 19:58:19 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libs/libjpeg/jfdctflt.c Sat Feb 01 19:58:19 2014 +0200 1.3 @@ -0,0 +1,168 @@ 1.4 +/* 1.5 + * jfdctflt.c 1.6 + * 1.7 + * Copyright (C) 1994-1996, Thomas G. Lane. 1.8 + * This file is part of the Independent JPEG Group's software. 1.9 + * For conditions of distribution and use, see the accompanying README file. 1.10 + * 1.11 + * This file contains a floating-point implementation of the 1.12 + * forward DCT (Discrete Cosine Transform). 1.13 + * 1.14 + * This implementation should be more accurate than either of the integer 1.15 + * DCT implementations. However, it may not give the same results on all 1.16 + * machines because of differences in roundoff behavior. Speed will depend 1.17 + * on the hardware's floating point capacity. 1.18 + * 1.19 + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT 1.20 + * on each column. Direct algorithms are also available, but they are 1.21 + * much more complex and seem not to be any faster when reduced to code. 1.22 + * 1.23 + * This implementation is based on Arai, Agui, and Nakajima's algorithm for 1.24 + * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in 1.25 + * Japanese, but the algorithm is described in the Pennebaker & Mitchell 1.26 + * JPEG textbook (see REFERENCES section in file README). The following code 1.27 + * is based directly on figure 4-8 in P&M. 1.28 + * While an 8-point DCT cannot be done in less than 11 multiplies, it is 1.29 + * possible to arrange the computation so that many of the multiplies are 1.30 + * simple scalings of the final outputs. These multiplies can then be 1.31 + * folded into the multiplications or divisions by the JPEG quantization 1.32 + * table entries. The AA&N method leaves only 5 multiplies and 29 adds 1.33 + * to be done in the DCT itself. 1.34 + * The primary disadvantage of this method is that with a fixed-point 1.35 + * implementation, accuracy is lost due to imprecise representation of the 1.36 + * scaled quantization values. However, that problem does not arise if 1.37 + * we use floating point arithmetic. 1.38 + */ 1.39 + 1.40 +#define JPEG_INTERNALS 1.41 +#include "jinclude.h" 1.42 +#include "jpeglib.h" 1.43 +#include "jdct.h" /* Private declarations for DCT subsystem */ 1.44 + 1.45 +#ifdef DCT_FLOAT_SUPPORTED 1.46 + 1.47 + 1.48 +/* 1.49 + * This module is specialized to the case DCTSIZE = 8. 1.50 + */ 1.51 + 1.52 +#if DCTSIZE != 8 1.53 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 1.54 +#endif 1.55 + 1.56 + 1.57 +/* 1.58 + * Perform the forward DCT on one block of samples. 1.59 + */ 1.60 + 1.61 +GLOBAL(void) 1.62 +jpeg_fdct_float (FAST_FLOAT * data) 1.63 +{ 1.64 + FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1.65 + FAST_FLOAT tmp10, tmp11, tmp12, tmp13; 1.66 + FAST_FLOAT z1, z2, z3, z4, z5, z11, z13; 1.67 + FAST_FLOAT *dataptr; 1.68 + int ctr; 1.69 + 1.70 + /* Pass 1: process rows. */ 1.71 + 1.72 + dataptr = data; 1.73 + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1.74 + tmp0 = dataptr[0] + dataptr[7]; 1.75 + tmp7 = dataptr[0] - dataptr[7]; 1.76 + tmp1 = dataptr[1] + dataptr[6]; 1.77 + tmp6 = dataptr[1] - dataptr[6]; 1.78 + tmp2 = dataptr[2] + dataptr[5]; 1.79 + tmp5 = dataptr[2] - dataptr[5]; 1.80 + tmp3 = dataptr[3] + dataptr[4]; 1.81 + tmp4 = dataptr[3] - dataptr[4]; 1.82 + 1.83 + /* Even part */ 1.84 + 1.85 + tmp10 = tmp0 + tmp3; /* phase 2 */ 1.86 + tmp13 = tmp0 - tmp3; 1.87 + tmp11 = tmp1 + tmp2; 1.88 + tmp12 = tmp1 - tmp2; 1.89 + 1.90 + dataptr[0] = tmp10 + tmp11; /* phase 3 */ 1.91 + dataptr[4] = tmp10 - tmp11; 1.92 + 1.93 + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ 1.94 + dataptr[2] = tmp13 + z1; /* phase 5 */ 1.95 + dataptr[6] = tmp13 - z1; 1.96 + 1.97 + /* Odd part */ 1.98 + 1.99 + tmp10 = tmp4 + tmp5; /* phase 2 */ 1.100 + tmp11 = tmp5 + tmp6; 1.101 + tmp12 = tmp6 + tmp7; 1.102 + 1.103 + /* The rotator is modified from fig 4-8 to avoid extra negations. */ 1.104 + z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ 1.105 + z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ 1.106 + z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ 1.107 + z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ 1.108 + 1.109 + z11 = tmp7 + z3; /* phase 5 */ 1.110 + z13 = tmp7 - z3; 1.111 + 1.112 + dataptr[5] = z13 + z2; /* phase 6 */ 1.113 + dataptr[3] = z13 - z2; 1.114 + dataptr[1] = z11 + z4; 1.115 + dataptr[7] = z11 - z4; 1.116 + 1.117 + dataptr += DCTSIZE; /* advance pointer to next row */ 1.118 + } 1.119 + 1.120 + /* Pass 2: process columns. */ 1.121 + 1.122 + dataptr = data; 1.123 + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1.124 + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 1.125 + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 1.126 + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 1.127 + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 1.128 + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 1.129 + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 1.130 + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 1.131 + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 1.132 + 1.133 + /* Even part */ 1.134 + 1.135 + tmp10 = tmp0 + tmp3; /* phase 2 */ 1.136 + tmp13 = tmp0 - tmp3; 1.137 + tmp11 = tmp1 + tmp2; 1.138 + tmp12 = tmp1 - tmp2; 1.139 + 1.140 + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ 1.141 + dataptr[DCTSIZE*4] = tmp10 - tmp11; 1.142 + 1.143 + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ 1.144 + dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ 1.145 + dataptr[DCTSIZE*6] = tmp13 - z1; 1.146 + 1.147 + /* Odd part */ 1.148 + 1.149 + tmp10 = tmp4 + tmp5; /* phase 2 */ 1.150 + tmp11 = tmp5 + tmp6; 1.151 + tmp12 = tmp6 + tmp7; 1.152 + 1.153 + /* The rotator is modified from fig 4-8 to avoid extra negations. */ 1.154 + z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ 1.155 + z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ 1.156 + z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ 1.157 + z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ 1.158 + 1.159 + z11 = tmp7 + z3; /* phase 5 */ 1.160 + z13 = tmp7 - z3; 1.161 + 1.162 + dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ 1.163 + dataptr[DCTSIZE*3] = z13 - z2; 1.164 + dataptr[DCTSIZE*1] = z11 + z4; 1.165 + dataptr[DCTSIZE*7] = z11 - z4; 1.166 + 1.167 + dataptr++; /* advance pointer to next column */ 1.168 + } 1.169 +} 1.170 + 1.171 +#endif /* DCT_FLOAT_SUPPORTED */