#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define FIXED_POINT_SCALE 256
#define CORDIC_NTAB 64
#define N_MAX 16
// === Function prototypes for printing helpers (updated) ===
void print_matrix(const char* title, float* mat, int n);
void print_matrix_double(const char* title, double* mat, int n);
void print_matrix_int64(const char* title, int64_t* mat, int n);
void print_zigzag_int64(const char* title, int64_t* arr, int n);
// === Quantization Matrix Selector ===
void select_quant_matrix(int n, int quant_matrix[N_MAX][N_MAX]) {
int i, j;
memset(quant_matrix
, 0, sizeof(int) * N_MAX
* N_MAX
);
if (n == 4) {
int tmp[4][4] = {
{16, 12, 34, 61},
{14, 20, 57, 58},
{22, 52, 95, 87},
{72, 96, 104, 99}
};
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
quant_matrix[i][j] = tmp[i][j];
} else if (n == 6) {
int tmp[6][6] = {
{16, 11, 15, 27, 46, 61},
{13, 13, 19, 35, 61, 55},
{14, 18, 26, 54, 79, 61},
{19, 30, 54, 77, 106, 80},
{37, 57, 75, 97, 116, 97},
{72, 93, 97, 109, 102, 99}
};
for (i = 0; i < 6; i++)
for (j = 0; j < 6; j++)
quant_matrix[i][j] = tmp[i][j];
} else if (n == 8) {
int tmp[8][8] = {
{16, 11, 10, 16, 24, 40, 51, 61},
{12, 12, 14, 19, 26, 58, 60, 55},
{14, 13, 16, 24, 40, 57, 69, 56},
{14, 17, 22, 29, 51, 87, 80, 62},
{18, 22, 37, 56, 68, 109, 103, 77},
{24, 35, 55, 64, 81, 104, 113, 92},
{49, 64, 78, 87, 103, 121, 120, 101},
{72, 92, 95, 98, 112, 100, 103, 99}
};
for (i = 0; i < 8; i++)
for (j = 0; j < 8; j++)
quant_matrix[i][j] = tmp[i][j];
} else if (n == 10) {
int tmp[10][10] = {
{16, 12, 10, 12, 17, 23, 34, 45, 53, 61},
{13, 12, 12, 15, 19, 25, 42, 55, 58, 56},
{13, 13, 14, 17, 23, 32, 48, 61, 63, 56},
{14, 14, 16, 20, 27, 41, 57, 69, 69, 58},
{14, 17, 21, 26, 33, 50, 75, 86, 78, 64},
{18, 20, 28, 40, 53, 64, 91, 104, 94, 75},
{22, 28, 39, 52, 63, 75, 95, 107, 104, 87},
{33, 43, 55, 67, 75, 88, 104, 113, 111, 96},
{53, 65, 76, 84, 91, 103, 112, 116, 112, 101},
{72, 87, 94, 96, 99, 110, 104, 101, 102, 99}
};
for (i = 0; i < 10; i++)
for (j = 0; j < 10; j++)
quant_matrix[i][j] = tmp[i][j];
} else if (n == 12) {
int tmp[12][12] = {
{16, 13, 11, 10, 13, 17, 22, 30, 41, 48, 54, 61},
{13, 12, 12, 12, 15, 19, 24, 35, 51, 55, 57, 57},
{13, 12, 13, 14, 17, 22, 27, 40, 58, 61, 60, 55},
{14, 13, 14, 16, 20, 26, 35, 46, 58, 65, 63, 56},
{14, 15, 16, 19, 23, 29, 41, 56, 72, 74, 69, 59},
{15, 17, 19, 24, 29, 36, 49, 68, 90, 86, 76, 64},
{17, 20, 24, 32, 42, 52, 62, 80, 104, 100, 89, 74},
{21, 25, 31, 42, 52, 62, 71, 87, 107, 107, 98, 83},
{26, 32, 42, 55, 62, 69, 79, 92, 106, 111, 106, 93},
{40, 49, 58, 69, 76, 83, 93, 105, 116, 118, 111, 98},
{56, 66, 76, 83, 88, 93, 103, 109, 113, 113, 109, 100},
{72, 84, 93, 95, 97, 100, 109, 106, 100, 102, 102, 99}
};
for (i = 0; i < 12; i++)
for (j = 0; j < 12; j++)
quant_matrix[i][j] = tmp[i][j];
} else if (n == 14) {
int tmp[14][14] = {
{16, 13, 11, 10, 11, 14, 18, 22, 28, 37, 44, 50, 55, 61},
{14, 12, 12, 12, 13, 16, 19, 23, 31, 44, 51, 55, 57, 58},
{12, 12, 12, 13, 15, 18, 21, 25, 34, 51, 59, 60, 58, 55},
{13, 13, 13, 14, 16, 20, 24, 31, 40, 53, 60, 65, 61, 56},
{14, 14, 14, 15, 18, 22, 28, 37, 47, 57, 64, 70, 64, 57},
{14, 15, 16, 18, 21, 25, 31, 42, 55, 71, 76, 76, 68, 60},
{15, 16, 18, 22, 26, 31, 38, 49, 64, 85, 89, 85, 75, 65},
{17, 19, 21, 28, 35, 43, 51, 60, 74, 96, 101, 98, 85, 73},
{20, 23, 26, 35, 44, 53, 61, 68, 81, 101, 107, 106, 94, 81},
{23, 28, 34, 43, 53, 59, 66, 75, 86, 100, 107, 111, 101, 90},
{32, 38, 45, 55, 64, 69, 76, 85, 95, 107, 112, 115, 106, 95},
{46, 54, 62, 70, 77, 82, 88, 97, 106, 117, 120, 119, 110, 100},
{59, 67, 76, 82, 86, 90, 95, 103, 108, 110, 111, 112, 106, 100},
{72, 82, 92, 94, 95, 97, 101, 109, 108, 102, 101, 103, 101, 99}
};
for (i = 0; i < 14; i++)
for (j = 0; j < 14; j++)
quant_matrix[i][j] = tmp[i][j];
} else if (n == 16) {
int tmp[16][16] = {
{16, 13, 11, 11, 10, 12, 15, 18, 22, 27, 34, 41, 46, 52, 56, 61},
{14, 13, 12, 12, 12, 13, 16, 19, 23, 28, 38, 49, 52, 55, 57, 58},
{12, 12, 12, 13, 13, 15, 18, 20, 24, 30, 44, 57, 58, 59, 57, 55},
{13, 13, 12, 13, 14, 17, 19, 23, 28, 35, 47, 58, 61, 63, 59, 55},
{14, 13, 13, 14, 15, 18, 22, 26, 33, 41, 50, 58, 63, 67, 61, 56},
{14, 14, 14, 16, 17, 20, 24, 29, 38, 47, 57, 67, 70, 71, 64, 58},
{14, 15, 16, 18, 20, 23, 26, 32, 42, 54, 68, 80, 79, 76, 68, 61},
{15, 16, 18, 21, 24, 28, 32, 39, 49, 61, 78, 91, 88, 84, 74, 66},
{17, 18, 20, 25, 30, 37, 44, 51, 58, 69, 87, 102, 99, 94, 83, 73},
{19, 21, 24, 30, 37, 45, 54, 61, 67, 77, 94, 108, 106, 103, 91, 80},
{22, 25, 29, 36, 45, 52, 58, 65, 72, 82, 95, 106, 108, 108, 97, 87},
{26, 31, 37, 45, 54, 60, 65, 71, 79, 88, 98, 107, 111, 112, 102, 93},
{37, 43, 49, 57, 65, 71, 75, 81, 89, 97, 107, 114, 116, 116, 106, 97},
{50, 57, 64, 71, 77, 82, 86, 92, 99, 107, 114, 119, 119, 117, 109, 101},
{60, 68, 76, 81, 85, 89, 91, 96, 103, 108, 109, 109, 110, 110, 105, 100},
{72, 81, 91, 93, 95, 96, 97, 102, 108, 109, 104, 100, 102, 103, 101, 99}
};
for (i = 0; i < 16; i++)
for (j = 0; j < 16; j++)
quant_matrix[i][j] = tmp[i][j];
}
}
int rotations(int bpp){
if(bpp>3 & bpp<9){
return 16;
}
else if(bpp>9 & bpp<17){
return 32;
}
else{
return 55;
}
}
static const __uint128_t atanh_fixed55[56] = {
0,
231808622658467921920, 136844620538003570688, 72304939235532316672, 36703116139066482688, 8422781014094860288,
9220371395095598080, 4611310773424436736, 2305796098435487488, 1152915640598518656, 576460019297349376,
288230284525795200, 144115176622611392, 72057592606272224, 36028796840007000, 18014398487112362,
9007199251944789, 4503599627020970, 2251799813641557, 1125899906837163, 562949953420629,
281474976710571, 140737488355317, 70368744177663, 35184372088832, 17592186044416,
8796093022208, 4398046511104, 2199023255552, 1099511627776, 549755813888,
274877906944, 137438953472, 68719476736, 34359738368, 17179869184,
8589934592, 4294967296, 2147483648, 1073741824, 536870912,
268435456, 134217728, 67108864, 33554432, 16777216,
8388608, 4194304, 2097152, 1048576, 524288,
262144, 131072, 65536, 32768, 16384
};
const uint64_t k = 11201839480116572000ULL;
//uint64_t K = k >> (63 - TP_WR_WIDTH - 12);
int clog2_n2(int n) {
int v = n*n, count = 0;
while (v > 1) { v >>= 1; ++count; }
return count + 1;
}
void print_matrix(const char* title, float* mat, int n) {
for (int i = 0; i < n; i++) {
for (int j
= 0; j
< n
; j
++) printf("%8.4f ", mat
[i
*n
+ j
]); }
}
void print_matrix_double(const char* title, double* mat, int n) {
printf("\n%s (double):\n", title
); for (int i = 0; i < n; i++) {
for (int j
= 0; j
< n
; j
++) printf("%12.4f ", mat
[i
*n
+ j
]); }
}
void print_matrix_int64(const char* title, int64_t* mat, int n) {
printf("\n%s (int64_t):\n", title
); for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++)
printf("%12lld ", (long long)mat
[i
*n
+ j
]); }
}
void print_zigzag_int64(const char* title, int64_t* arr, int n) {
int total = n * n;
printf("\n%s (length %d):\n", title
, total
); for (int i = 0; i < total; ++i) {
printf("[%3d]=%6lld ", i
, (long long)arr
[i
]); if ((i
+ 1) % 8 == 0) printf("\n"); }
if (total
% 8 != 0) printf("\n"); }
void cordic_rotation(uint64_t theta, int rot, int64_t* c, int64_t* s, int bpp,int tc_mode,int idct_mode) {
uint64_t atanh_fixed[56];
const uint64_t m_pi = 14488038916154245000LL;
int TP_WR_WIDTH = bpp/2+bpp+3+(1-tc_mode)*(1-idct_mode);
uint64_t K = k >> (64 - TP_WR_WIDTH - 12);
for (int i = 1; i <= 56; i++) {
atanh_fixed[i] = (uint64_t)(atanh_fixed55[i] >> (63 - TP_WR_WIDTH - 12));
}
while (theta < -m_pi)
theta += (m_pi << 1);
while (theta > m_pi)
theta -= (m_pi << 1);
int sign = 1;
if(theta < -(2*m_pi)){
theta += m_pi;
sign = -1;
}
else if(theta > (2*m_pi)){
theta -= m_pi;
sign = -1;
}
uint64_t x = K, y = 0, z = theta;
for (int i = 0; i < rot; ++i){
int64_t di = (z >= 0) ? 1 : -1;
int64_t x_new = x - di * (y >> i);
int64_t y_new = y + di * (x >> i);
int64_t z_new = z - di * atanh_fixed[i];
x = x_new;
y = y_new;
z = z_new;
}
*c = sign * x;
*s = sign * y;
}
void cordic_dct_1d(const uint64_t* in, uint64_t* out, int n, int rot, int bpp, int tc_mode, int idct_mode) {
static int64_t alpha[N_MAX];
const uint64_t m_pi = 14488038916154245000LL;
static int inited = 0;
if (!inited){
for (int k = 1; k < n; ++k)
inited = 1;
}
for (int k = 0; k < n; ++k) {
int64_t acc = 0;
for(int m = 0; m < n; ++m) {
int64_t angle = (m_pi * (2LL * m + 1LL) * k) / (2LL * n);
int64_t c, s;
cordic_rotation(angle, rot, &c, &s, bpp, tc_mode, idct_mode);
acc += in[m] * c;
}
out[k] = alpha[k] * acc;
}
}
// quantize from int64_t (signed) to int64_t (signed)
void quantize_fixed(int64_t* in, int64_t* out, int n,int rt_mode, int quant_matrix[N_MAX][N_MAX]) {
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
int q = quant_matrix[i][j];
if (q == 0) q = 1; // safety
if(rt_mode==0) {
double v = (double)in[i*n + j] / (double)q;
out[i*n + j] = (int64_t)llround(v);
}
else{
out[i*n+j] = in[i*n+j]/q;
}
}
}
void zigzag_traversal_int64(int64_t* mat, int64_t* out, int n) {
int i = 0, j = 0, index = 0, up = 1;
for (int k = 0; k < n * n; k++) {
out[index++] = mat[i*n + j];
if (up) {
if (j == n - 1) { i++; up = 0; }
else if (i == 0) { j++; up = 0; }
else { i--; j++; }
} else {
if (i == n - 1) { j++; up = 1; }
else if (j == 0) { i++; up = 1; }
else { i++; j--; }
}
}
}
static inline int64_t limit_bits_signed(int64_t x, int bits) {
int64_t min, max;
if (bits >= 63) return x;
min = -(1LL << (bits - 1));
max = (1LL << (bits - 1)) - 1;
if (x < min) x = min;
else if (x > max) x = max;
return x;
}
static inline int64_t inverse_limit_bits_signed(int64_t x, int bits)
{
if (bits >= 63) return x; // No limit needed
int64_t max = (1LL << bits) - 1; // Max unsigned value with N bits
if (x < 0)
return 0; // Negative → clamp to 0
if (x > max)
return max; // Overflow → clamp to max
return x; // Within range
}
// === MAIN FUNCTION ===
int main() {
int n = 8, bpp = 8;
printf("Set matrix size n (even, 4–16, default 8): "); int tmpn;
if (scanf("%d", &tmpn
) == 1 && tmpn
>= 4 && tmpn
<= 16 && tmpn
% 2 == 0) n = tmpn;
printf("Set bits per pixel bpp (4–32, default 8): "); int tmpb;
if (scanf("%d", &tmpb
) == 1 && tmpb
>= 4 && tmpb
<= 32) bpp = tmpb;
int tc_mode;
printf("enter the tc_mode (0=normal signed, 1=normal signed – currently same): "); scanf("%d", &tc_mode
); // kept for interface; not changing behavior
int idct_mode;
printf("enter the idct_mode (0=DCT, 1=IDCT): ");
int rt_mode;
printf("enter the rt_mode (0 :rounded up 1:truncate): ");
int addr_bits = clog2_n2(n);
printf("For n=%d, RAM address width: %d bits\n", n
, addr_bits
);
// Select quantization matrix
int quant_matrix[N_MAX][N_MAX], rot;
select_quant_matrix(n, quant_matrix);
rot = rotations(bpp);
// Effective input width (for range limit only)
// int input_bits = (idct_mode == 0) ? bpp : (n/2 + bpp);
// unsigned long long max_val = (input_bits >= 63) ? ((1ULL << 62) - 1ULL)
// : ((1ULL << (input_bits - 1)) - 1ULL);
// Buffers
int64_t dct_mem[N_MAX*N_MAX]; // generic float buffer (spatial or freq)
int64_t tp_mem[N_MAX*N_MAX]; // after row transform
int64_t tp_transposed[N_MAX*N_MAX]; // transposed intermediate
int64_t fixed[N_MAX*N_MAX]; // integer-rounded values
int64_t quantized[N_MAX*N_MAX]; // quantized coefficients
int64_t zigzag[N_MAX*N_MAX]; // zigzag order
int64_t zigzag_matrix[N_MAX*N_MAX]; // zigzag array reshaped as matrix
int64_t idct_matrix[N_MAX*N_MAX]; // de-zigzagged quantized
int64_t idct_mem[N_MAX*N_MAX]; // dequantized freq-domain (int64)
if (idct_mode == 0) {
int done=0;
// ==========================
// Forward 2D DCT + Quant + Zigzag
// ==========================
int input_bits = (idct_mode == 0) ? bpp : (n/2 + bpp);
unsigned long long max_val = (input_bits >= 64) ? ~0ULL : ((1ULL << input_bits) - 1ULL);
printf("Enter %d input values (decimal, 0–%llu):\n", n
* n
, max_val
); for (int addr = 0; addr < n * n; ++addr) {
unsigned long long uval;
if (scanf("%llu", &uval
) != 1 || uval
> max_val
) { fprintf(stderr
, "Input out of range (0–%llu)\n", max_val
); return 1;
}
// Interpret as unsigned or signed (two's complement) depending on tc_mode
if (tc_mode == 1) {
dct_mem[addr] = (~uval+1) & ((((uint64_t)1 << input_bits) - 1));;
printf("WR dct_mem[%d] = %.0f\n", addr
, dct_mem
[addr
]); done=1;
//int64_t sval = tc_to_signed(uval, input_bits);
// dct_mem[addr] = twos_complement_to_signed(uval, input_bits);
// printf("WR dct_mem[%d] = %lld (from 0x%llX, %d-bit two's complement)\n",
// addr, (long long)sval, uval, input_bits);
} else {
dct_mem[addr] = (int64_t)uval;
printf("WR dct_mem[%d] = %.0f\n", addr
, dct_mem
[addr
]); done=1;
}
}
// Step 1: Row-wise DCT
for (int i = 0; i < n; i++) {
int64_t in_row[N_MAX], out_row[N_MAX];
for (int j = 0; j < n; j++)
in_row[j] = dct_mem[i*n + j];
cordic_dct_1d(in_row, out_row, n, rot, bpp, tc_mode, idct_mode);
for (int j = 0; j < n; j++)
tp_mem[i*n + j] = (int64_t)out_row[j];
}
print_matrix_int64("1D DCT (Rows) Output", tp_mem, n);
// Step 2: Transpose after rounding
//float_to_int64(tp_mem, fixed, n, rt_mode);
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
tp_transposed[i*n + j] = (int64_t)fixed[j*n + i];
//print_matrix_double("Transpose after 1D DCT (double)", tp_transposed, n);
// Step 3: Column-wise DCT on transposed
for (int i = 0; i < n; i++) {
int64_t in_col[N_MAX], out_col[N_MAX];
for (int j = 0; j < n; j++)
in_col[j] = tp_transposed[i*n + j];
cordic_dct_1d(in_col, out_col, n, rot, bpp, tc_mode, idct_mode);
for (int j = 0; j < n; j++)
dct_mem[i*n + j] = (int64_t)out_col[j];
}
print_matrix_int64("Final 2D DCT Result", dct_mem, n);
// Step 4: Quantization + Zigzag
//float_to_int64(dct_mem, fixed, n, rt_mode);
print_matrix_int64("DCT rounded to int64 (before quant)", fixed, n);
quantize_fixed(fixed, quantized, n,rt_mode, quant_matrix);
int dct_bits=(n/2+bpp);
for (int i = 0; i < n*n; ++i)
quantized[i] = limit_bits_signed(quantized[i], dct_bits);
print_matrix_int64("Quantized Matrix (int64)", quantized, n);
zigzag_traversal_int64(quantized, zigzag, n);
print_zigzag_int64("Zig-Zag Output Array", zigzag, n);
// Also reshape zigzag as n x n matrix (for easier visual)
for (int idx = 0; idx < n*n; idx++)
zigzag_matrix[idx] = zigzag[idx];
print_matrix_int64("Zig-Zag Output (as n x n matrix)", zigzag_matrix, n);
} return 0;
}
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#define FIXED_POINT_SCALE 256
#define CORDIC_NTAB 64
#define N_MAX 16

// === Function prototypes for printing helpers (updated) ===
void print_matrix(const char* title, float* mat, int n);
void print_matrix_double(const char* title, double* mat, int n);
void print_matrix_int64(const char* title, int64_t* mat, int n);
void print_zigzag_int64(const char* title, int64_t* arr, int n);

// === Quantization Matrix Selector ===
void select_quant_matrix(int n, int quant_matrix[N_MAX][N_MAX]) {
    int i, j;

    memset(quant_matrix, 0, sizeof(int) * N_MAX * N_MAX);

    if (n == 4) {
        int tmp[4][4] = {
            {16, 12, 34, 61},
            {14, 20, 57, 58},
            {22, 52, 95, 87},
            {72, 96, 104, 99}
        };
        for (i = 0; i < 4; i++)
            for (j = 0; j < 4; j++)
                quant_matrix[i][j] = tmp[i][j];
    } else if (n == 6) {
        int tmp[6][6] = {
                {16, 11, 15, 27, 46, 61},
                {13, 13, 19, 35, 61, 55},
                {14, 18, 26, 54, 79, 61},
                {19, 30, 54, 77, 106, 80},
                {37, 57, 75, 97, 116, 97},
                {72, 93, 97, 109, 102, 99}
            };
        for (i = 0; i < 6; i++)
            for (j = 0; j < 6; j++)
                quant_matrix[i][j] = tmp[i][j];
    } else if (n == 8) {
        int tmp[8][8] = {
            {16, 11, 10, 16, 24, 40, 51, 61},
            {12, 12, 14, 19, 26, 58, 60, 55},
            {14, 13, 16, 24, 40, 57, 69, 56},
            {14, 17, 22, 29, 51, 87, 80, 62},
            {18, 22, 37, 56, 68, 109, 103, 77},
            {24, 35, 55, 64, 81, 104, 113, 92},
            {49, 64, 78, 87, 103, 121, 120, 101},
            {72, 92, 95, 98, 112, 100, 103, 99}
        };
        for (i = 0; i < 8; i++)
            for (j = 0; j < 8; j++)
                quant_matrix[i][j] = tmp[i][j];
    } else if (n == 10) {
        int tmp[10][10] = {
            {16, 12, 10, 12, 17, 23, 34, 45, 53, 61},
            {13, 12, 12, 15, 19, 25, 42, 55, 58, 56},
            {13, 13, 14, 17, 23, 32, 48, 61, 63, 56},
            {14, 14, 16, 20, 27, 41, 57, 69, 69, 58},
            {14, 17, 21, 26, 33, 50, 75, 86, 78, 64},
            {18, 20, 28, 40, 53, 64, 91, 104, 94, 75},
            {22, 28, 39, 52, 63, 75, 95, 107, 104, 87},
            {33, 43, 55, 67, 75, 88, 104, 113, 111, 96},
            {53, 65, 76, 84, 91, 103, 112, 116, 112, 101},
            {72, 87, 94, 96, 99, 110, 104, 101, 102, 99}
        };
        for (i = 0; i < 10; i++)
            for (j = 0; j < 10; j++)
                quant_matrix[i][j] = tmp[i][j];
    } else if (n == 12) {
        int tmp[12][12] = {
            {16, 13, 11, 10, 13, 17, 22, 30, 41, 48, 54, 61},
            {13, 12, 12, 12, 15, 19, 24, 35, 51, 55, 57, 57},
            {13, 12, 13, 14, 17, 22, 27, 40, 58, 61, 60, 55},
            {14, 13, 14, 16, 20, 26, 35, 46, 58, 65, 63, 56},
            {14, 15, 16, 19, 23, 29, 41, 56, 72, 74, 69, 59},
            {15, 17, 19, 24, 29, 36, 49, 68, 90, 86, 76, 64},
            {17, 20, 24, 32, 42, 52, 62, 80, 104, 100, 89, 74},
            {21, 25, 31, 42, 52, 62, 71, 87, 107, 107, 98, 83},
            {26, 32, 42, 55, 62, 69, 79, 92, 106, 111, 106, 93},
            {40, 49, 58, 69, 76, 83, 93, 105, 116, 118, 111, 98},
            {56, 66, 76, 83, 88, 93, 103, 109, 113, 113, 109, 100},
            {72, 84, 93, 95, 97, 100, 109, 106, 100, 102, 102, 99}
        };
        for (i = 0; i < 12; i++)
            for (j = 0; j < 12; j++)
                quant_matrix[i][j] = tmp[i][j];
    } else if (n == 14) {
        int tmp[14][14] = {
            {16, 13, 11, 10, 11, 14, 18, 22, 28, 37, 44, 50, 55, 61},
            {14, 12, 12, 12, 13, 16, 19, 23, 31, 44, 51, 55, 57, 58},
            {12, 12, 12, 13, 15, 18, 21, 25, 34, 51, 59, 60, 58, 55},
            {13, 13, 13, 14, 16, 20, 24, 31, 40, 53, 60, 65, 61, 56},
            {14, 14, 14, 15, 18, 22, 28, 37, 47, 57, 64, 70, 64, 57},
            {14, 15, 16, 18, 21, 25, 31, 42, 55, 71, 76, 76, 68, 60},
            {15, 16, 18, 22, 26, 31, 38, 49, 64, 85, 89, 85, 75, 65},
            {17, 19, 21, 28, 35, 43, 51, 60, 74, 96, 101, 98, 85, 73},
            {20, 23, 26, 35, 44, 53, 61, 68, 81, 101, 107, 106, 94, 81},
            {23, 28, 34, 43, 53, 59, 66, 75, 86, 100, 107, 111, 101, 90},
            {32, 38, 45, 55, 64, 69, 76, 85, 95, 107, 112, 115, 106, 95},
            {46, 54, 62, 70, 77, 82, 88, 97, 106, 117, 120, 119, 110, 100},
            {59, 67, 76, 82, 86, 90, 95, 103, 108, 110, 111, 112, 106, 100},
            {72, 82, 92, 94, 95, 97, 101, 109, 108, 102, 101, 103, 101, 99}
        };
        for (i = 0; i < 14; i++)
            for (j = 0; j < 14; j++)
                quant_matrix[i][j] = tmp[i][j];
    } else if (n == 16) {
        int tmp[16][16] = {
            {16, 13, 11, 11, 10, 12, 15, 18, 22, 27, 34, 41, 46, 52, 56, 61},
            {14, 13, 12, 12, 12, 13, 16, 19, 23, 28, 38, 49, 52, 55, 57, 58},
            {12, 12, 12, 13, 13, 15, 18, 20, 24, 30, 44, 57, 58, 59, 57, 55},
            {13, 13, 12, 13, 14, 17, 19, 23, 28, 35, 47, 58, 61, 63, 59, 55},
            {14, 13, 13, 14, 15, 18, 22, 26, 33, 41, 50, 58, 63, 67, 61, 56},
            {14, 14, 14, 16, 17, 20, 24, 29, 38, 47, 57, 67, 70, 71, 64, 58},
            {14, 15, 16, 18, 20, 23, 26, 32, 42, 54, 68, 80, 79, 76, 68, 61},
            {15, 16, 18, 21, 24, 28, 32, 39, 49, 61, 78, 91, 88, 84, 74, 66},
            {17, 18, 20, 25, 30, 37, 44, 51, 58, 69, 87, 102, 99, 94, 83, 73},
            {19, 21, 24, 30, 37, 45, 54, 61, 67, 77, 94, 108, 106, 103, 91, 80},
            {22, 25, 29, 36, 45, 52, 58, 65, 72, 82, 95, 106, 108, 108, 97, 87},
            {26, 31, 37, 45, 54, 60, 65, 71, 79, 88, 98, 107, 111, 112, 102, 93},
            {37, 43, 49, 57, 65, 71, 75, 81, 89, 97, 107, 114, 116, 116, 106, 97},
            {50, 57, 64, 71, 77, 82, 86, 92, 99, 107, 114, 119, 119, 117, 109, 101},
            {60, 68, 76, 81, 85, 89, 91, 96, 103, 108, 109, 109, 110, 110, 105, 100},
            {72, 81, 91, 93, 95, 96, 97, 102, 108, 109, 104, 100, 102, 103, 101, 99}
        };
        for (i = 0; i < 16; i++)
            for (j = 0; j < 16; j++)
                quant_matrix[i][j] = tmp[i][j];
    }
}

int rotations(int bpp){
	if(bpp>3 & bpp<9){
		return 16;
	}
	else if(bpp>9 & bpp<17){
		return 32;
	}
	else{
		return 55;
	}
}

static const __uint128_t atanh_fixed55[56] = {
0, 
231808622658467921920, 136844620538003570688, 72304939235532316672, 36703116139066482688, 8422781014094860288, 
9220371395095598080, 4611310773424436736, 2305796098435487488, 1152915640598518656, 576460019297349376, 
288230284525795200, 144115176622611392, 72057592606272224, 36028796840007000, 18014398487112362, 
9007199251944789, 4503599627020970, 2251799813641557, 1125899906837163, 562949953420629, 
281474976710571, 140737488355317, 70368744177663, 35184372088832, 17592186044416, 
8796093022208, 4398046511104, 2199023255552, 1099511627776, 549755813888, 
274877906944, 137438953472, 68719476736, 34359738368, 17179869184, 
8589934592, 4294967296, 2147483648, 1073741824, 536870912, 
268435456, 134217728, 67108864, 33554432, 16777216, 
8388608, 4194304, 2097152, 1048576, 524288, 
262144, 131072, 65536, 32768, 16384
};

const uint64_t k = 11201839480116572000ULL;

//uint64_t K = k >> (63 - TP_WR_WIDTH - 12);

int clog2_n2(int n) {
    int v = n*n, count = 0;
    while (v > 1) { v >>= 1; ++count; }
    return count + 1;
}

void print_matrix(const char* title, float* mat, int n) {
    printf("\n%s:\n", title);
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) printf("%8.4f ", mat[i*n + j]);
        printf("\n");
    }
}

void print_matrix_double(const char* title, double* mat, int n) {
    printf("\n%s (double):\n", title);
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) printf("%12.4f ", mat[i*n + j]);
        printf("\n");
    }
}

void print_matrix_int64(const char* title, int64_t* mat, int n) {
    printf("\n%s (int64_t):\n", title);
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++)
            printf("%12lld ", (long long)mat[i*n + j]);
        printf("\n");
    }
}

void print_zigzag_int64(const char* title, int64_t* arr, int n) {
    int total = n * n;
    printf("\n%s (length %d):\n", title, total);
    for (int i = 0; i < total; ++i) {
        printf("[%3d]=%6lld  ", i, (long long)arr[i]);
        if ((i + 1) % 8 == 0) printf("\n");
    }
    if (total % 8 != 0) printf("\n");
}

void cordic_rotation(uint64_t theta, int rot, int64_t* c, int64_t* s, int bpp,int tc_mode,int idct_mode) {
    uint64_t atanh_fixed[56];
    const uint64_t m_pi = 14488038916154245000LL;
    int TP_WR_WIDTH = bpp/2+bpp+3+(1-tc_mode)*(1-idct_mode);
    uint64_t K = k >> (64 - TP_WR_WIDTH - 12);
    for (int i = 1; i <= 56; i++) {
        atanh_fixed[i] = (uint64_t)(atanh_fixed55[i] >> (63 - TP_WR_WIDTH - 12));
    }
    while (theta < -m_pi) 
	theta += (m_pi << 1);
    while (theta > m_pi)
	theta -= (m_pi << 1);
    int sign = 1;
    if(theta < -(2*m_pi)){
	theta += m_pi;
	sign = -1;
    }
    else if(theta > (2*m_pi)){
	theta -= m_pi;
	sign = -1;
    }
    uint64_t x = K, y = 0, z = theta;
    for (int i = 0; i < rot; ++i){
	int64_t di = (z >= 0) ? 1 : -1;
	int64_t x_new = x - di * (y >> i);
	int64_t y_new = y + di * (x >> i);
	int64_t z_new = z - di * atanh_fixed[i];
	x = x_new;
	y = y_new;
	z = z_new;
    }
    *c = sign * x;
    *s = sign * y;
}

void cordic_dct_1d(const uint64_t* in, uint64_t* out, int n, int rot, int bpp, int tc_mode, int idct_mode) {
    static int64_t alpha[N_MAX];
    const uint64_t m_pi = 14488038916154245000LL;
    static int inited = 0;
    if (!inited){
	alpha[0] = sqrt(1 / n);
	for (int k = 1; k < n; ++k)
	    alpha[k] = sqrt(2 / n);
	inited = 1;
    }
    for (int k = 0; k < n; ++k) {
	int64_t acc = 0;
	for(int m = 0; m < n; ++m) {
	    int64_t angle = (m_pi * (2LL * m + 1LL) * k) / (2LL * n);
	    int64_t c, s;
	    cordic_rotation(angle, rot, &c, &s, bpp, tc_mode, idct_mode);
	    acc += in[m] * c;
	}
    out[k] = alpha[k] * acc;
    }
}

// quantize from int64_t (signed) to int64_t (signed)
void quantize_fixed(int64_t* in, int64_t* out, int n,int rt_mode, int quant_matrix[N_MAX][N_MAX]) {
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++) {
            int q = quant_matrix[i][j];
            if (q == 0) q = 1; // safety
            if(rt_mode==0) {
                double v = (double)in[i*n + j] / (double)q;
                out[i*n + j] = (int64_t)llround(v);
            }
            else{
                out[i*n+j]   = in[i*n+j]/q;
            }
        }
}

void zigzag_traversal_int64(int64_t* mat, int64_t* out, int n) {
    int i = 0, j = 0, index = 0, up = 1;
    for (int k = 0; k < n * n; k++) {
        out[index++] = mat[i*n + j];
        if (up) {
            if (j == n - 1)      { i++; up = 0; }
            else if (i == 0)     { j++; up = 0; }
            else                 { i--; j++; }
        } else {
            if (i == n - 1)      { j++; up = 1; }
            else if (j == 0)     { i++; up = 1; }
            else                 { i++; j--; }
        }
    }
}

static inline int64_t limit_bits_signed(int64_t x, int bits) {
    int64_t min, max;
    if (bits >= 63) return x;
    min = -(1LL << (bits - 1));
    max =  (1LL << (bits - 1)) - 1;
    if (x < min) x = min;
    else if (x > max) x = max;
    return x;
}

static inline int64_t inverse_limit_bits_signed(int64_t x, int bits)
{
    if (bits >= 63) return x;               // No limit needed

    int64_t max = (1LL << bits) - 1;        // Max unsigned value with N bits

    if (x < 0)
        return 0;                           // Negative → clamp to 0

    if (x > max)
        return max;                         // Overflow → clamp to max

    return x;                               // Within range
}

// === MAIN FUNCTION ===
int main() {
    int n = 8, bpp = 8;

    printf("Set matrix size n (even, 4–16, default 8): ");
    int tmpn;
    if (scanf("%d", &tmpn) == 1 && tmpn >= 4 && tmpn <= 16 && tmpn % 2 == 0)
        n = tmpn;

    printf("Set bits per pixel bpp (4–32, default 8): ");
    int tmpb;
    if (scanf("%d", &tmpb) == 1 && tmpb >= 4 && tmpb <= 32)
        bpp = tmpb;

    int tc_mode;
    printf("enter the tc_mode (0=normal signed, 1=normal signed – currently same): ");
    scanf("%d", &tc_mode);  // kept for interface; not changing behavior

    int idct_mode;
    printf("enter the idct_mode (0=DCT, 1=IDCT): ");
    scanf("%d", &idct_mode);

    int rt_mode;
    printf("enter the rt_mode (0 :rounded up 1:truncate): ");
    scanf("%d", &rt_mode);

    int addr_bits = clog2_n2(n);
    printf("For n=%d, RAM address width: %d bits\n", n, addr_bits);

    // Select quantization matrix
    int quant_matrix[N_MAX][N_MAX], rot;
    select_quant_matrix(n, quant_matrix);
    rot = rotations(bpp);
    printf("\n%d:\n", rot);

    // Effective input width (for range limit only)
    // int input_bits = (idct_mode == 0) ? bpp : (n/2 + bpp);
    // unsigned long long max_val = (input_bits >= 63) ? ((1ULL << 62) - 1ULL)
    //                                                 : ((1ULL << (input_bits - 1)) - 1ULL);

    // Buffers
    int64_t  dct_mem[N_MAX*N_MAX];        // generic float buffer (spatial or freq)
    int64_t  tp_mem[N_MAX*N_MAX];         // after row transform
    int64_t tp_transposed[N_MAX*N_MAX];  // transposed intermediate
    int64_t fixed[N_MAX*N_MAX];         // integer-rounded values
    int64_t quantized[N_MAX*N_MAX];     // quantized coefficients
    int64_t zigzag[N_MAX*N_MAX];        // zigzag order
    int64_t zigzag_matrix[N_MAX*N_MAX]; // zigzag array reshaped as matrix
    int64_t idct_matrix[N_MAX*N_MAX];   // de-zigzagged quantized
    int64_t idct_mem[N_MAX*N_MAX];      // dequantized freq-domain (int64)

    

    if (idct_mode == 0) {
        int done=0;
        // ==========================
        // Forward 2D DCT + Quant + Zigzag
        // ==========================
        int input_bits = (idct_mode == 0) ? bpp : (n/2 + bpp);
        unsigned long long max_val = (input_bits >= 64) ? ~0ULL : ((1ULL << input_bits) - 1ULL);
        printf("Enter %d input values (decimal, 0–%llu):\n", n * n, max_val);
        for (int addr = 0; addr < n * n; ++addr) {
            unsigned long long uval;
            if (scanf("%llu", &uval) != 1 || uval > max_val) {
                fprintf(stderr, "Input out of range (0–%llu)\n", max_val);
                return 1;
            }
    
            // Interpret as unsigned or signed (two's complement) depending on tc_mode
            if (tc_mode == 1) {
                dct_mem[addr] = (~uval+1) & ((((uint64_t)1 << input_bits) - 1));;
                printf("WR dct_mem[%d] = %.0f\n", addr, dct_mem[addr]);
                done=1;
                //int64_t sval = tc_to_signed(uval, input_bits);
                // dct_mem[addr] = twos_complement_to_signed(uval, input_bits);
                // printf("WR dct_mem[%d] = %lld (from 0x%llX, %d-bit two's complement)\n",
                //       addr, (long long)sval, uval, input_bits);
            } else {
            dct_mem[addr] = (int64_t)uval;
            printf("WR dct_mem[%d] = %.0f\n", addr, dct_mem[addr]);
            done=1;
        }
    }

        // Step 1: Row-wise DCT
        for (int i = 0; i < n; i++) {
            int64_t in_row[N_MAX], out_row[N_MAX];
            for (int j = 0; j < n; j++)
                in_row[j] = dct_mem[i*n + j];
            cordic_dct_1d(in_row, out_row, n, rot, bpp, tc_mode, idct_mode);
            for (int j = 0; j < n; j++)
                tp_mem[i*n + j] = (int64_t)out_row[j];
        }
        print_matrix_int64("1D DCT (Rows) Output", tp_mem, n);

        // Step 2: Transpose after rounding
        //float_to_int64(tp_mem, fixed, n, rt_mode);

        for (int i = 0; i < n; i++)
            for (int j = 0; j < n; j++)
                tp_transposed[i*n + j] = (int64_t)fixed[j*n + i];

        //print_matrix_double("Transpose after 1D DCT (double)", tp_transposed, n);

        // Step 3: Column-wise DCT on transposed
        for (int i = 0; i < n; i++) {
            int64_t in_col[N_MAX], out_col[N_MAX];
            for (int j = 0; j < n; j++)
                in_col[j] = tp_transposed[i*n + j];
            cordic_dct_1d(in_col, out_col, n, rot, bpp, tc_mode, idct_mode);
            for (int j = 0; j < n; j++)
                dct_mem[i*n + j] = (int64_t)out_col[j];
        }
        print_matrix_int64("Final 2D DCT Result", dct_mem, n);

        // Step 4: Quantization + Zigzag
        //float_to_int64(dct_mem, fixed, n, rt_mode);
        print_matrix_int64("DCT rounded to int64 (before quant)", fixed, n);

        quantize_fixed(fixed, quantized, n,rt_mode, quant_matrix);
        int dct_bits=(n/2+bpp);
        for (int i = 0; i < n*n; ++i)
            quantized[i] = limit_bits_signed(quantized[i], dct_bits);
        print_matrix_int64("Quantized Matrix (int64)", quantized, n);

        zigzag_traversal_int64(quantized, zigzag, n);
        print_zigzag_int64("Zig-Zag Output Array", zigzag, n);

        // Also reshape zigzag as n x n matrix (for easier visual)
        for (int idx = 0; idx < n*n; idx++)
            zigzag_matrix[idx] = zigzag[idx];
        print_matrix_int64("Zig-Zag Output (as n x n matrix)", zigzag_matrix, n);

    }     return 0;
}