/*******************************************************************
        IDCT module used LLM algorithm (based IJG idct_int.c)
 *******************************************************************/

#include "idct_clip_table.h"

#define IDCT_INT32_C
#include "idct_int32.h"

#define FIX_0_298631336   2446
#define FIX_0_390180644   3196
#define FIX_0_541196100   4433
#define FIX_0_765366865   6270
#define FIX_0_899976223   7373
#define FIX_1_175875602   9633
#define FIX_1_501321110  12299
#define FIX_1_847759065  15137
#define FIX_1_961570560  16069
#define FIX_2_053119869  16819
#define FIX_2_562915447  20995
#define FIX_3_072711026  25172

void __stdcall idct_int32(short *block)
{
	int i;
	
	int w0, w1, w2, w3;
	int w4, w5, w6, w7;
	
	int z1, z2, z3, z4, z5;

	short *s, *d;
	int *w;
	
	int work[64];

	s = block;
	w = work;

	for(i=0;i<8;i++){
		if((s[1] | s[2] | s[3] | s[4] | s[5] | s[6] | s[7]) == 0) {
			w[0] = w[1*8] = w[2*8] = w[3*8] = w[4*8] = w[5*8] = w[6*8] = w[7*8] = (s[0] << 2);
			s += 8;
			w += 1;
			continue;
		}

		z2 = s[2];
		z3 = s[6];

		z1 = (z2+z3) * FIX_0_541196100;
		w2 = z1 + (z3 * (- FIX_1_847759065));
		w3 = z1 + (z2 * FIX_0_765366865);

		w0 = (s[0] + s[4]) << 13;
		w1 = (s[0] - s[4]) << 13;

		w4 = w0 + w3;
		w7 = w0 - w3;
		w5 = w1 + w2;
		w6 = w1 - w2;

		w0 = s[7];
		w1 = s[5];
		w2 = s[3];
		w3 = s[1];

		z1 = w0 + w3;
		z2 = w1 + w2;
		z3 = w0 + w2;
		z4 = w1 + w3;
		z5 = (z3 + z4) * FIX_1_1758575602;

		w0 *= FIX_0_298631336;
		w1 *= FIX_2_053119869;
		w2 *= FIX_3_072711026;
		w3 *= FIX_1_501321110;
		z1 *= (- FIX_0_899976223);
		z2 *= (- FIX_2_562915447);
		z3 *= (- FIX_1_961560560);
		z4 *= (- FIX_0_390180644);

		z3 += z5;
		z4 += z5;

		tmp0 += z1 + z3;
		tmp1 += z2 + z4;
		tmp2 += z2 + z3;
		tmp3 += z1 + z4;

		