#include <stdlib.h>
#include <stdio.h>

#ifdef _WIN32

#define  int8_t  char
#define uint8_t  unsigned char
#define  int16_t short
#define uint16_t unsigned short
#define  int32_t int
#define uint32_t unsigned int
#define  int64_t __int64
#define uint64_t unsigned __int64

#else // !_WIN32

#ifdef __unix__

#include <inttypes.h>

#endif // __unix__
#endif // _WIN32

/* dummy S-Box for the implementation test
 * actual one requires NDA
 *
 * 実装テスト用ダミー S-Box
 * 実際の S-Box は 4C Entity とのライセンシー以外には非公開
 */
static const uint8_t sbox[256] = { 
	0xB6, 0xAA, 0xEB, 0xB3, 0x35, 0x5D, 0xEE, 0xB1, 0x72, 0x33, 0x05, 0x13, 0x6D, 0xC7, 0x6C, 0x27, 
	0x25, 0x54, 0xE9, 0x4C, 0xDE, 0xC3, 0x21, 0x39, 0xA9, 0xAB, 0xD6, 0xDF, 0xE8, 0x71, 0x94, 0xAE, 
	0x16, 0x44, 0x76, 0xCD, 0xB7, 0x78, 0x20, 0xF0, 0xC1, 0x9F, 0xCF, 0xAF, 0x0F, 0xCB, 0x59, 0x83, 
	0x3A, 0x5E, 0xB8, 0xB5, 0xF3, 0x47, 0x80, 0xC2, 0xF6, 0x14, 0xE6, 0x69, 0xFC, 0x17, 0xE0, 0xE5, 
	0x79, 0xF9, 0x12, 0xBF, 0x3C, 0xB4, 0x66, 0xAD, 0xF7, 0x65, 0x95, 0xF4, 0x4E, 0x02, 0xA0, 0x07, 
	0x4D, 0x2F, 0x0D, 0x7E, 0xE4, 0xEF, 0xA1, 0x8C, 0x6E, 0xD2, 0xFD, 0x19, 0x1C, 0x82, 0x42, 0xBB, 
	0x9A, 0x43, 0xC6, 0xE2, 0x1F, 0xF2, 0x75, 0x1A, 0x63, 0x45, 0xD1, 0x30, 0x81, 0x7F, 0x8E, 0x62, 
	0x3B, 0xA4, 0xFB, 0x1E, 0x5F, 0xBC, 0xB0, 0x40, 0x8B, 0x74, 0x38, 0x8A, 0xC4, 0x73, 0x9C, 0x09, 
	0xD4, 0xED, 0xD3, 0x5A, 0x60, 0x48, 0xC5, 0x9E, 0x01, 0xCC, 0x34, 0x1B, 0x58, 0x36, 0x23, 0x88, 
	0x7A, 0x90, 0x9B, 0x8F, 0xBD, 0x3F, 0xB9, 0x57, 0xA2, 0x3E, 0x04, 0xB2, 0x49, 0x37, 0x5C, 0x7D, 
	0x61, 0x4A, 0xA6, 0x67, 0xEC, 0x7C, 0x0E, 0x96, 0xDD, 0xE3, 0x2C, 0x56, 0x08, 0x0C, 0x8D, 0x2B, 
	0x6A, 0xFE, 0xEA, 0xA3, 0xCA, 0x3D, 0x91, 0xE7, 0xC9, 0xAC, 0x03, 0xD5, 0x89, 0x86, 0xDC, 0x10,
	0x55, 0x77, 0xC8, 0xD7, 0x97, 0x24, 0x46, 0x9D, 0x0A, 0x1D, 0x22, 0xD9, 0xFF, 0x5B, 0x52, 0xD8, 
	0x00, 0xFA, 0x53, 0x26, 0x29, 0x2E, 0x2A, 0x11, 0xC0, 0x6F, 0x4F, 0x7B, 0x28, 0x99, 0x41, 0x92, 
	0xDB, 0xF8, 0x50, 0xA8, 0x51, 0xA5, 0x4B, 0x93, 0x87, 0xDA, 0x06, 0x85, 0x2D, 0xBA, 0x0B, 0x98, 
	0x70, 0x6B, 0xBE, 0xF1, 0x18, 0xD0, 0x31, 0x68, 0x15, 0x84, 0x64, 0xE1, 0xCE, 0xA7, 0xF5, 0x32, 
};

void C2_E(uint8_t *key, uint8_t *buf);
void C2_D(uint8_t *key, uint8_t *buf);
void C2_ECBC(uint8_t *key, uint8_t *buf, int size);
void C2_DCBC(uint8_t *key, uint8_t *buf, int size);

static uint8_t  lrot8(uint8_t code, int n);
static uint32_t lrot32(uint32_t code, int n);
static uint32_t F(uint32_t code, uint32_t key);

#define ROUND 10

/*----------------------------------------------------------------*/
void C2_E(uint8_t *key, uint8_t *buf)
{
	uint32_t L,R,work;
	uint32_t ka,kb,kc,kd;
	uint32_t sk[ROUND];
	int i;

	L = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + (buf[3]);
	R = (buf[4] << 24) + (buf[5] << 16) + (buf[6] << 8) + (buf[7]);

	ka = (key[0] << 16) + (key[1] << 8) + (key[2]);
	kb = (key[3] << 24) + (key[4] << 16) + (key[5] << 8) + (key[6]);

	for(i=0;i<ROUND;i++){
		ka &= 0x00ffffff;
		sk[i] = kb + (sbox[(ka&0xff)^i]<<4);
		kc = kb >> (32-17);
		kd = ka >> (24-17);
		ka = (ka << 17) | kc;
		kb = (kb << 17) | kd;
	}

	for(i=0;i<ROUND;i++){
		L += F(R, sk[i]);
		work = L;
		L = R;
		R = work;
	}

	work = L;
	L = R;
	R = work;

	buf[0] = (uint8_t)((L >> 24) & 0xff);
	buf[1] = (uint8_t)((L >> 16) & 0xff);
	buf[2] = (uint8_t)((L >>  8) & 0xff);
	buf[3] = (uint8_t)((L      ) & 0xff);

	buf[4] = (uint8_t)((R >> 24) & 0xff);
	buf[5] = (uint8_t)((R >> 16) & 0xff);
	buf[6] = (uint8_t)((R >>  8) & 0xff);
	buf[7] = (uint8_t)((R      ) & 0xff);
}

void C2_D(uint8_t *key, uint8_t *buf)
{
	uint32_t L,R,work;
	uint32_t ka,kb,kc,kd;
	uint32_t sk[ROUND];
	int i;

	L = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + (buf[3]);
	R = (buf[4] << 24) + (buf[5] << 16) + (buf[6] << 8) + (buf[7]);

	ka = (key[0] << 16) + (key[1] << 8) + (key[2]);
	kb = (key[3] << 24) + (key[4] << 16) + (key[5] << 8) + (key[6]);

	for(i=0;i<ROUND;i++){
		ka &= 0x00ffffff;
		sk[i] = kb + (sbox[(ka&0xff)^i]<<4);
		kc = kb >> (32-17);
		kd = ka >> (24-17);
		ka = (ka << 17) | kc;
		kb = (kb << 17) | kd;
	}

	for(i=ROUND-1;i>=0;i--){
		L -= F(R, sk[i]);
		work = L;
		L = R;
		R = work;
	}

	work = L;
	L = R;
	R = work;

	buf[0] = (uint8_t)((L >> 24) & 0xff);
	buf[1] = (uint8_t)((L >> 16) & 0xff);
	buf[2] = (uint8_t)((L >>  8) & 0xff);
	buf[3] = (uint8_t)((L      ) & 0xff);

	buf[4] = (uint8_t)((R >> 24) & 0xff);
	buf[5] = (uint8_t)((R >> 16) & 0xff);
	buf[6] = (uint8_t)((R >>  8) & 0xff);
	buf[7] = (uint8_t)((R      ) & 0xff);
}

void C2_ECBC(uint8_t *key, uint8_t *buf, int size)
{
	uint32_t L,R,work;
	uint32_t ka,kb,kc,kd;
	uint32_t sk[ROUND];

	uint8_t wk[7];

	int i,j;
	int key_round = ROUND;

	memcpy(wk, key, 7);

	for(i=0;i<size;i+=8){
		L = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + (buf[3]);
		R = (buf[4] << 24) + (buf[5] << 16) + (buf[6] << 8) + (buf[7]);

		ka = (wk[0] << 16) + (wk[1] << 8) + (wk[2]);
		kb = (wk[3] << 24) + (wk[4] << 16) + (wk[5] << 8) + (wk[6]);

		for(j=0;j<key_round;j++){
			ka &= 0x00ffffff;
			sk[j] = kb + (sbox[(ka&0xff)^j]<<4);
			kc = kb >> (32-17);
			kd = ka >> (24-17);
			ka = (ka << 17) | kc;
			kb = (kb << 17) | kd;
		}

		for(j=0;j<ROUND;j++){
			L += F(R, sk[j%key_round]);
			if(j == 4){
				wk[0] = (uint8_t)(key[0] ^ (R>>16));
				wk[1] = (uint8_t)(key[1] ^ (R>> 8));
				wk[2] = (uint8_t)(key[2] ^ (R    ));
				wk[3] = (uint8_t)(key[3] ^ (L>>24));
				wk[4] = (uint8_t)(key[4] ^ (L>>16));
				wk[5] = (uint8_t)(key[5] ^ (L>> 8));
				wk[6] = (uint8_t)(key[6] ^ (L    ));
			}
			work = L;
			L = R;
			R = work;
		}

		work = L;
		L = R;
		R = work;

		buf[0] = (uint8_t)((L >> 24) & 0xff);
		buf[1] = (uint8_t)((L >> 16) & 0xff);
		buf[2] = (uint8_t)((L >>  8) & 0xff);
		buf[3] = (uint8_t)((L      ) & 0xff);

		buf[4] = (uint8_t)((R >> 24) & 0xff);
		buf[5] = (uint8_t)((R >> 16) & 0xff);
		buf[6] = (uint8_t)((R >>  8) & 0xff);
		buf[7] = (uint8_t)((R      ) & 0xff);

		buf += 8;

		key_round = 2;
	}
}

void C2_DCBC(uint8_t *key, uint8_t *buf, int size)
{
	uint32_t L,R,work;
	uint32_t ka,kb,kc,kd;
	uint32_t sk[ROUND];

	uint8_t wk[7];

	int i,j;
	int key_round = ROUND;

	memcpy(wk, key, 7);

	for(i=0;i<size;i+=8){
		L = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + (buf[3]);
		R = (buf[4] << 24) + (buf[5] << 16) + (buf[6] << 8) + (buf[7]);

		ka = (wk[0] << 16) + (wk[1] << 8) + (wk[2]);
		kb = (wk[3] << 24) + (wk[4] << 16) + (wk[5] << 8) + (wk[6]);

		for(j=0;j<key_round;j++){
			ka &= 0x00ffffff;
			sk[j] = kb + (sbox[(ka&0xff)^j]<<4);
			kc = kb >> (32-17);
			kd = ka >> (24-17);
			ka = (ka << 17) | kc;
			kb = (kb << 17) | kd;
		}

		for(j=ROUND-1;j>=0;j--){
			L -= F(R, sk[j%key_round]);
			work = L;
			L = R;
			R = work;
			if(j == 5){
				wk[0] = (uint8_t)(key[0] ^ (R>>16));
				wk[1] = (uint8_t)(key[1] ^ (R>> 8));
				wk[2] = (uint8_t)(key[2] ^ (R    ));
				wk[3] = (uint8_t)(key[3] ^ (L>>24));
				wk[4] = (uint8_t)(key[4] ^ (L>>16));
				wk[5] = (uint8_t)(key[5] ^ (L>> 8));
				wk[6] = (uint8_t)(key[6] ^ (L    ));
			}
		}

		work = L;
		L = R;
		R = work;

		buf[0] = (uint8_t)((L >> 24) & 0xff);
		buf[1] = (uint8_t)((L >> 16) & 0xff);
		buf[2] = (uint8_t)((L >>  8) & 0xff);
		buf[3] = (uint8_t)((L      ) & 0xff);

		buf[4] = (uint8_t)((R >> 24) & 0xff);
		buf[5] = (uint8_t)((R >> 16) & 0xff);
		buf[6] = (uint8_t)((R >>  8) & 0xff);
		buf[7] = (uint8_t)((R      ) & 0xff);

		buf += 8;

		key_round = 2;
	}
}

static uint8_t  lrot8(uint8_t code, int n)
{
	return (code<<n)|(code>>(8-n));
}

static uint32_t lrot32(uint32_t code, int n)
{
	return (code<<n)|(code>>(32-n));
}

static uint32_t F(uint32_t code, uint32_t key)
{
	uint32_t work;
	uint8_t u;
	uint8_t v[4];

	work = code+key;

	v[3] = (uint8_t)((work>>24)&0xff);
	v[2] = (uint8_t)((work>>16)&0xff);
	v[1] = (uint8_t)((work>> 8)&0xff);
	v[0] = sbox[work&0xff];
	u = v[0] ^ 0x65;
	v[1] ^= lrot8(u, 1);
	u = v[0] ^ 0x2b;
	v[2] ^= lrot8(u, 5);
	u = v[0] ^ 0xc9;
	v[3] ^= lrot8(u, 2);

	work = (v[3]<<24) + (v[2]<<16) + (v[1]<<8) + (v[0]);
	work ^= lrot32(work, 9) ^ lrot32(work, 22);

	return work;
}

int main(int argc, char **argv)
{
	int i;
	
	uint8_t key[7] = {
		0x7c, 0xb3, 0xc4, 0xdb, 0x09, 0x47, 0x13,
	};

	uint8_t p[24] = {
		0xa2, 0x46, 0x32, 0xd8, 0x24, 0x32, 0x08, 0x44,
		0x7d, 0x81, 0x11, 0xdf, 0x8c, 0xe2, 0x41, 0x72,
		0x76, 0xbe, 0x42, 0xd7, 0x0d, 0xb1, 0x44, 0x18,
	};

	uint8_t c[24] = {
		0x50, 0xfc, 0x09, 0xd1, 0x69, 0x1c, 0x51, 0x02,
		0x54, 0x1d, 0x32, 0x2f, 0x68, 0xe7, 0xfd, 0x79,
		0x91, 0xa8, 0x0c, 0x3d, 0x9d, 0x9f, 0x31, 0x0d,
	};

	uint8_t b[24];

	memcpy(b, p, sizeof(b));
	C2_ECBC(key, b, sizeof(b));
	if(memcmp(b, c, sizeof(b)) != 0){
		fprintf(stderr, "failed on C2_ECBC()\n");
		fprintf(stderr, "%02x", b[0]);
		for(i=1;i<sizeof(b);i++){
			fprintf(stderr, ":%02x", b[i]);
		}
		fprintf(stderr, "\n");
	}else{
		fprintf(stderr, "passed on C2_ECBC()\n");
	}

	memcpy(b, c, sizeof(b));
	C2_DCBC(key, b, sizeof(b));
	if(memcmp(b, p, sizeof(b)) != 0){
		fprintf(stderr, "failed on C2_DCBC()\n");
		fprintf(stderr, "%02x", b[0]);
		for(i=1;i<sizeof(b);i++){
			fprintf(stderr, ":%02x", b[i]);
		}
		fprintf(stderr, "\n");
	}else{
		fprintf(stderr, "passed on C2_DCBC()\n");
	}
	
	return EXIT_SUCCESS;
}
