vrshoot
diff libs/libjpeg/jidctred.c @ 0:b2f14e535253
initial commit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 01 Feb 2014 19:58:19 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libs/libjpeg/jidctred.c Sat Feb 01 19:58:19 2014 +0200 1.3 @@ -0,0 +1,398 @@ 1.4 +/* 1.5 + * jidctred.c 1.6 + * 1.7 + * Copyright (C) 1994-1998, Thomas G. Lane. 1.8 + * This file is part of the Independent JPEG Group's software. 1.9 + * For conditions of distribution and use, see the accompanying README file. 1.10 + * 1.11 + * This file contains inverse-DCT routines that produce reduced-size output: 1.12 + * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block. 1.13 + * 1.14 + * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M) 1.15 + * algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step 1.16 + * with an 8-to-4 step that produces the four averages of two adjacent outputs 1.17 + * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output). 1.18 + * These steps were derived by computing the corresponding values at the end 1.19 + * of the normal LL&M code, then simplifying as much as possible. 1.20 + * 1.21 + * 1x1 is trivial: just take the DC coefficient divided by 8. 1.22 + * 1.23 + * See jidctint.c for additional comments. 1.24 + */ 1.25 + 1.26 +#define JPEG_INTERNALS 1.27 +#include "jinclude.h" 1.28 +#include "jpeglib.h" 1.29 +#include "jdct.h" /* Private declarations for DCT subsystem */ 1.30 + 1.31 +#ifdef IDCT_SCALING_SUPPORTED 1.32 + 1.33 + 1.34 +/* 1.35 + * This module is specialized to the case DCTSIZE = 8. 1.36 + */ 1.37 + 1.38 +#if DCTSIZE != 8 1.39 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 1.40 +#endif 1.41 + 1.42 + 1.43 +/* Scaling is the same as in jidctint.c. */ 1.44 + 1.45 +#if BITS_IN_JSAMPLE == 8 1.46 +#define CONST_BITS 13 1.47 +#define PASS1_BITS 2 1.48 +#else 1.49 +#define CONST_BITS 13 1.50 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 1.51 +#endif 1.52 + 1.53 +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 1.54 + * causing a lot of useless floating-point operations at run time. 1.55 + * To get around this we use the following pre-calculated constants. 1.56 + * If you change CONST_BITS you may want to add appropriate values. 1.57 + * (With a reasonable C compiler, you can just rely on the FIX() macro...) 1.58 + */ 1.59 + 1.60 +#if CONST_BITS == 13 1.61 +#define FIX_0_211164243 ((INT32) 1730) /* FIX(0.211164243) */ 1.62 +#define FIX_0_509795579 ((INT32) 4176) /* FIX(0.509795579) */ 1.63 +#define FIX_0_601344887 ((INT32) 4926) /* FIX(0.601344887) */ 1.64 +#define FIX_0_720959822 ((INT32) 5906) /* FIX(0.720959822) */ 1.65 +#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 1.66 +#define FIX_0_850430095 ((INT32) 6967) /* FIX(0.850430095) */ 1.67 +#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 1.68 +#define FIX_1_061594337 ((INT32) 8697) /* FIX(1.061594337) */ 1.69 +#define FIX_1_272758580 ((INT32) 10426) /* FIX(1.272758580) */ 1.70 +#define FIX_1_451774981 ((INT32) 11893) /* FIX(1.451774981) */ 1.71 +#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 1.72 +#define FIX_2_172734803 ((INT32) 17799) /* FIX(2.172734803) */ 1.73 +#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 1.74 +#define FIX_3_624509785 ((INT32) 29692) /* FIX(3.624509785) */ 1.75 +#else 1.76 +#define FIX_0_211164243 FIX(0.211164243) 1.77 +#define FIX_0_509795579 FIX(0.509795579) 1.78 +#define FIX_0_601344887 FIX(0.601344887) 1.79 +#define FIX_0_720959822 FIX(0.720959822) 1.80 +#define FIX_0_765366865 FIX(0.765366865) 1.81 +#define FIX_0_850430095 FIX(0.850430095) 1.82 +#define FIX_0_899976223 FIX(0.899976223) 1.83 +#define FIX_1_061594337 FIX(1.061594337) 1.84 +#define FIX_1_272758580 FIX(1.272758580) 1.85 +#define FIX_1_451774981 FIX(1.451774981) 1.86 +#define FIX_1_847759065 FIX(1.847759065) 1.87 +#define FIX_2_172734803 FIX(2.172734803) 1.88 +#define FIX_2_562915447 FIX(2.562915447) 1.89 +#define FIX_3_624509785 FIX(3.624509785) 1.90 +#endif 1.91 + 1.92 + 1.93 +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 1.94 + * For 8-bit samples with the recommended scaling, all the variable 1.95 + * and constant values involved are no more than 16 bits wide, so a 1.96 + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 1.97 + * For 12-bit samples, a full 32-bit multiplication will be needed. 1.98 + */ 1.99 + 1.100 +#if BITS_IN_JSAMPLE == 8 1.101 +#define MULTIPLY(var,const) MULTIPLY16C16(var,const) 1.102 +#else 1.103 +#define MULTIPLY(var,const) ((var) * (const)) 1.104 +#endif 1.105 + 1.106 + 1.107 +/* Dequantize a coefficient by multiplying it by the multiplier-table 1.108 + * entry; produce an int result. In this module, both inputs and result 1.109 + * are 16 bits or less, so either int or short multiply will work. 1.110 + */ 1.111 + 1.112 +#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) 1.113 + 1.114 + 1.115 +/* 1.116 + * Perform dequantization and inverse DCT on one block of coefficients, 1.117 + * producing a reduced-size 4x4 output block. 1.118 + */ 1.119 + 1.120 +GLOBAL(void) 1.121 +jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.122 + JCOEFPTR coef_block, 1.123 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.124 +{ 1.125 + INT32 tmp0, tmp2, tmp10, tmp12; 1.126 + INT32 z1, z2, z3, z4; 1.127 + JCOEFPTR inptr; 1.128 + ISLOW_MULT_TYPE * quantptr; 1.129 + int * wsptr; 1.130 + JSAMPROW outptr; 1.131 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.132 + int ctr; 1.133 + int workspace[DCTSIZE*4]; /* buffers data between passes */ 1.134 + SHIFT_TEMPS 1.135 + 1.136 + /* Pass 1: process columns from input, store into work array. */ 1.137 + 1.138 + inptr = coef_block; 1.139 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.140 + wsptr = workspace; 1.141 + for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { 1.142 + /* Don't bother to process column 4, because second pass won't use it */ 1.143 + if (ctr == DCTSIZE-4) 1.144 + continue; 1.145 + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 1.146 + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && 1.147 + inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { 1.148 + /* AC terms all zero; we need not examine term 4 for 4x4 output */ 1.149 + int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 1.150 + 1.151 + wsptr[DCTSIZE*0] = dcval; 1.152 + wsptr[DCTSIZE*1] = dcval; 1.153 + wsptr[DCTSIZE*2] = dcval; 1.154 + wsptr[DCTSIZE*3] = dcval; 1.155 + 1.156 + continue; 1.157 + } 1.158 + 1.159 + /* Even part */ 1.160 + 1.161 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.162 + tmp0 <<= (CONST_BITS+1); 1.163 + 1.164 + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.165 + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.166 + 1.167 + tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); 1.168 + 1.169 + tmp10 = tmp0 + tmp2; 1.170 + tmp12 = tmp0 - tmp2; 1.171 + 1.172 + /* Odd part */ 1.173 + 1.174 + z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.175 + z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.176 + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.177 + z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.178 + 1.179 + tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ 1.180 + + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ 1.181 + + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ 1.182 + + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ 1.183 + 1.184 + tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ 1.185 + + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ 1.186 + + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ 1.187 + + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ 1.188 + 1.189 + /* Final output stage */ 1.190 + 1.191 + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1); 1.192 + wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1); 1.193 + wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1); 1.194 + wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1); 1.195 + } 1.196 + 1.197 + /* Pass 2: process 4 rows from work array, store into output array. */ 1.198 + 1.199 + wsptr = workspace; 1.200 + for (ctr = 0; ctr < 4; ctr++) { 1.201 + outptr = output_buf[ctr] + output_col; 1.202 + /* It's not clear whether a zero row test is worthwhile here ... */ 1.203 + 1.204 +#ifndef NO_ZERO_ROW_TEST 1.205 + if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && 1.206 + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { 1.207 + /* AC terms all zero */ 1.208 + JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) 1.209 + & RANGE_MASK]; 1.210 + 1.211 + outptr[0] = dcval; 1.212 + outptr[1] = dcval; 1.213 + outptr[2] = dcval; 1.214 + outptr[3] = dcval; 1.215 + 1.216 + wsptr += DCTSIZE; /* advance pointer to next row */ 1.217 + continue; 1.218 + } 1.219 +#endif 1.220 + 1.221 + /* Even part */ 1.222 + 1.223 + tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1); 1.224 + 1.225 + tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065) 1.226 + + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865); 1.227 + 1.228 + tmp10 = tmp0 + tmp2; 1.229 + tmp12 = tmp0 - tmp2; 1.230 + 1.231 + /* Odd part */ 1.232 + 1.233 + z1 = (INT32) wsptr[7]; 1.234 + z2 = (INT32) wsptr[5]; 1.235 + z3 = (INT32) wsptr[3]; 1.236 + z4 = (INT32) wsptr[1]; 1.237 + 1.238 + tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ 1.239 + + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ 1.240 + + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ 1.241 + + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ 1.242 + 1.243 + tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ 1.244 + + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ 1.245 + + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ 1.246 + + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ 1.247 + 1.248 + /* Final output stage */ 1.249 + 1.250 + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, 1.251 + CONST_BITS+PASS1_BITS+3+1) 1.252 + & RANGE_MASK]; 1.253 + outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, 1.254 + CONST_BITS+PASS1_BITS+3+1) 1.255 + & RANGE_MASK]; 1.256 + outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, 1.257 + CONST_BITS+PASS1_BITS+3+1) 1.258 + & RANGE_MASK]; 1.259 + outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, 1.260 + CONST_BITS+PASS1_BITS+3+1) 1.261 + & RANGE_MASK]; 1.262 + 1.263 + wsptr += DCTSIZE; /* advance pointer to next row */ 1.264 + } 1.265 +} 1.266 + 1.267 + 1.268 +/* 1.269 + * Perform dequantization and inverse DCT on one block of coefficients, 1.270 + * producing a reduced-size 2x2 output block. 1.271 + */ 1.272 + 1.273 +GLOBAL(void) 1.274 +jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.275 + JCOEFPTR coef_block, 1.276 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.277 +{ 1.278 + INT32 tmp0, tmp10, z1; 1.279 + JCOEFPTR inptr; 1.280 + ISLOW_MULT_TYPE * quantptr; 1.281 + int * wsptr; 1.282 + JSAMPROW outptr; 1.283 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.284 + int ctr; 1.285 + int workspace[DCTSIZE*2]; /* buffers data between passes */ 1.286 + SHIFT_TEMPS 1.287 + 1.288 + /* Pass 1: process columns from input, store into work array. */ 1.289 + 1.290 + inptr = coef_block; 1.291 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.292 + wsptr = workspace; 1.293 + for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { 1.294 + /* Don't bother to process columns 2,4,6 */ 1.295 + if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6) 1.296 + continue; 1.297 + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 && 1.298 + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { 1.299 + /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ 1.300 + int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 1.301 + 1.302 + wsptr[DCTSIZE*0] = dcval; 1.303 + wsptr[DCTSIZE*1] = dcval; 1.304 + 1.305 + continue; 1.306 + } 1.307 + 1.308 + /* Even part */ 1.309 + 1.310 + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.311 + tmp10 = z1 << (CONST_BITS+2); 1.312 + 1.313 + /* Odd part */ 1.314 + 1.315 + z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.316 + tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */ 1.317 + z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.318 + tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ 1.319 + z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.320 + tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ 1.321 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.322 + tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ 1.323 + 1.324 + /* Final output stage */ 1.325 + 1.326 + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2); 1.327 + wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2); 1.328 + } 1.329 + 1.330 + /* Pass 2: process 2 rows from work array, store into output array. */ 1.331 + 1.332 + wsptr = workspace; 1.333 + for (ctr = 0; ctr < 2; ctr++) { 1.334 + outptr = output_buf[ctr] + output_col; 1.335 + /* It's not clear whether a zero row test is worthwhile here ... */ 1.336 + 1.337 +#ifndef NO_ZERO_ROW_TEST 1.338 + if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { 1.339 + /* AC terms all zero */ 1.340 + JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) 1.341 + & RANGE_MASK]; 1.342 + 1.343 + outptr[0] = dcval; 1.344 + outptr[1] = dcval; 1.345 + 1.346 + wsptr += DCTSIZE; /* advance pointer to next row */ 1.347 + continue; 1.348 + } 1.349 +#endif 1.350 + 1.351 + /* Even part */ 1.352 + 1.353 + tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2); 1.354 + 1.355 + /* Odd part */ 1.356 + 1.357 + tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ 1.358 + + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ 1.359 + + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ 1.360 + + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ 1.361 + 1.362 + /* Final output stage */ 1.363 + 1.364 + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, 1.365 + CONST_BITS+PASS1_BITS+3+2) 1.366 + & RANGE_MASK]; 1.367 + outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, 1.368 + CONST_BITS+PASS1_BITS+3+2) 1.369 + & RANGE_MASK]; 1.370 + 1.371 + wsptr += DCTSIZE; /* advance pointer to next row */ 1.372 + } 1.373 +} 1.374 + 1.375 + 1.376 +/* 1.377 + * Perform dequantization and inverse DCT on one block of coefficients, 1.378 + * producing a reduced-size 1x1 output block. 1.379 + */ 1.380 + 1.381 +GLOBAL(void) 1.382 +jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.383 + JCOEFPTR coef_block, 1.384 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.385 +{ 1.386 + int dcval; 1.387 + ISLOW_MULT_TYPE * quantptr; 1.388 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.389 + SHIFT_TEMPS 1.390 + 1.391 + /* We hardly need an inverse DCT routine for this: just take the 1.392 + * average pixel value, which is one-eighth of the DC coefficient. 1.393 + */ 1.394 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.395 + dcval = DEQUANTIZE(coef_block[0], quantptr[0]); 1.396 + dcval = (int) DESCALE((INT32) dcval, 3); 1.397 + 1.398 + output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; 1.399 +} 1.400 + 1.401 +#endif /* IDCT_SCALING_SUPPORTED */